x86/oprofile: Fix bogus GCC-8 warning in nmi_setup()
[cris-mirror.git] / kernel / sched / cpufreq_schedutil.c
blob7936f548e071e201a2125981dedce3e162711a24
1 /*
2 * CPUFreq governor based on scheduler-provided CPU utilization data.
4 * Copyright (C) 2016, Intel Corporation
5 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 #include <linux/cpufreq.h>
15 #include <linux/kthread.h>
16 #include <uapi/linux/sched/types.h>
17 #include <linux/slab.h>
18 #include <trace/events/power.h>
20 #include "sched.h"
22 struct sugov_tunables {
23 struct gov_attr_set attr_set;
24 unsigned int rate_limit_us;
27 struct sugov_policy {
28 struct cpufreq_policy *policy;
30 struct sugov_tunables *tunables;
31 struct list_head tunables_hook;
33 raw_spinlock_t update_lock; /* For shared policies */
34 u64 last_freq_update_time;
35 s64 freq_update_delay_ns;
36 unsigned int next_freq;
37 unsigned int cached_raw_freq;
39 /* The next fields are only needed if fast switch cannot be used. */
40 struct irq_work irq_work;
41 struct kthread_work work;
42 struct mutex work_lock;
43 struct kthread_worker worker;
44 struct task_struct *thread;
45 bool work_in_progress;
47 bool need_freq_update;
50 struct sugov_cpu {
51 struct update_util_data update_util;
52 struct sugov_policy *sg_policy;
53 unsigned int cpu;
55 bool iowait_boost_pending;
56 unsigned int iowait_boost;
57 unsigned int iowait_boost_max;
58 u64 last_update;
60 /* The fields below are only needed when sharing a policy. */
61 unsigned long util_cfs;
62 unsigned long util_dl;
63 unsigned long max;
64 unsigned int flags;
66 /* The field below is for single-CPU policies only. */
67 #ifdef CONFIG_NO_HZ_COMMON
68 unsigned long saved_idle_calls;
69 #endif
72 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
74 /************************ Governor internals ***********************/
76 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
78 s64 delta_ns;
81 * Since cpufreq_update_util() is called with rq->lock held for
82 * the @target_cpu, our per-cpu data is fully serialized.
84 * However, drivers cannot in general deal with cross-cpu
85 * requests, so while get_next_freq() will work, our
86 * sugov_update_commit() call may not for the fast switching platforms.
88 * Hence stop here for remote requests if they aren't supported
89 * by the hardware, as calculating the frequency is pointless if
90 * we cannot in fact act on it.
92 * For the slow switching platforms, the kthread is always scheduled on
93 * the right set of CPUs and any CPU can find the next frequency and
94 * schedule the kthread.
96 if (sg_policy->policy->fast_switch_enabled &&
97 !cpufreq_can_do_remote_dvfs(sg_policy->policy))
98 return false;
100 if (sg_policy->work_in_progress)
101 return false;
103 if (unlikely(sg_policy->need_freq_update)) {
104 sg_policy->need_freq_update = false;
106 * This happens when limits change, so forget the previous
107 * next_freq value and force an update.
109 sg_policy->next_freq = UINT_MAX;
110 return true;
113 delta_ns = time - sg_policy->last_freq_update_time;
114 return delta_ns >= sg_policy->freq_update_delay_ns;
117 static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
118 unsigned int next_freq)
120 struct cpufreq_policy *policy = sg_policy->policy;
122 if (sg_policy->next_freq == next_freq)
123 return;
125 sg_policy->next_freq = next_freq;
126 sg_policy->last_freq_update_time = time;
128 if (policy->fast_switch_enabled) {
129 next_freq = cpufreq_driver_fast_switch(policy, next_freq);
130 if (!next_freq)
131 return;
133 policy->cur = next_freq;
134 trace_cpu_frequency(next_freq, smp_processor_id());
135 } else {
136 sg_policy->work_in_progress = true;
137 irq_work_queue(&sg_policy->irq_work);
142 * get_next_freq - Compute a new frequency for a given cpufreq policy.
143 * @sg_policy: schedutil policy object to compute the new frequency for.
144 * @util: Current CPU utilization.
145 * @max: CPU capacity.
147 * If the utilization is frequency-invariant, choose the new frequency to be
148 * proportional to it, that is
150 * next_freq = C * max_freq * util / max
152 * Otherwise, approximate the would-be frequency-invariant utilization by
153 * util_raw * (curr_freq / max_freq) which leads to
155 * next_freq = C * curr_freq * util_raw / max
157 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
159 * The lowest driver-supported frequency which is equal or greater than the raw
160 * next_freq (as calculated above) is returned, subject to policy min/max and
161 * cpufreq driver limitations.
163 static unsigned int get_next_freq(struct sugov_policy *sg_policy,
164 unsigned long util, unsigned long max)
166 struct cpufreq_policy *policy = sg_policy->policy;
167 unsigned int freq = arch_scale_freq_invariant() ?
168 policy->cpuinfo.max_freq : policy->cur;
170 freq = (freq + (freq >> 2)) * util / max;
172 if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
173 return sg_policy->next_freq;
174 sg_policy->cached_raw_freq = freq;
175 return cpufreq_driver_resolve_freq(policy, freq);
178 static void sugov_get_util(struct sugov_cpu *sg_cpu)
180 struct rq *rq = cpu_rq(sg_cpu->cpu);
182 sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
183 sg_cpu->util_cfs = cpu_util_cfs(rq);
184 sg_cpu->util_dl = cpu_util_dl(rq);
187 static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
190 * Ideally we would like to set util_dl as min/guaranteed freq and
191 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
192 * ready for such an interface. So, we only do the latter for now.
194 return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max);
197 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time)
199 if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) {
200 if (sg_cpu->iowait_boost_pending)
201 return;
203 sg_cpu->iowait_boost_pending = true;
205 if (sg_cpu->iowait_boost) {
206 sg_cpu->iowait_boost <<= 1;
207 if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
208 sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
209 } else {
210 sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
212 } else if (sg_cpu->iowait_boost) {
213 s64 delta_ns = time - sg_cpu->last_update;
215 /* Clear iowait_boost if the CPU apprears to have been idle. */
216 if (delta_ns > TICK_NSEC) {
217 sg_cpu->iowait_boost = 0;
218 sg_cpu->iowait_boost_pending = false;
223 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
224 unsigned long *max)
226 unsigned int boost_util, boost_max;
228 if (!sg_cpu->iowait_boost)
229 return;
231 if (sg_cpu->iowait_boost_pending) {
232 sg_cpu->iowait_boost_pending = false;
233 } else {
234 sg_cpu->iowait_boost >>= 1;
235 if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
236 sg_cpu->iowait_boost = 0;
237 return;
241 boost_util = sg_cpu->iowait_boost;
242 boost_max = sg_cpu->iowait_boost_max;
244 if (*util * boost_max < *max * boost_util) {
245 *util = boost_util;
246 *max = boost_max;
250 #ifdef CONFIG_NO_HZ_COMMON
251 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
253 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
254 bool ret = idle_calls == sg_cpu->saved_idle_calls;
256 sg_cpu->saved_idle_calls = idle_calls;
257 return ret;
259 #else
260 static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
261 #endif /* CONFIG_NO_HZ_COMMON */
263 static void sugov_update_single(struct update_util_data *hook, u64 time,
264 unsigned int flags)
266 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
267 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
268 struct cpufreq_policy *policy = sg_policy->policy;
269 unsigned long util, max;
270 unsigned int next_f;
271 bool busy;
273 sugov_set_iowait_boost(sg_cpu, time);
274 sg_cpu->last_update = time;
276 if (!sugov_should_update_freq(sg_policy, time))
277 return;
279 busy = sugov_cpu_is_busy(sg_cpu);
281 if (flags & SCHED_CPUFREQ_RT) {
282 next_f = policy->cpuinfo.max_freq;
283 } else {
284 sugov_get_util(sg_cpu);
285 max = sg_cpu->max;
286 util = sugov_aggregate_util(sg_cpu);
287 sugov_iowait_boost(sg_cpu, &util, &max);
288 next_f = get_next_freq(sg_policy, util, max);
290 * Do not reduce the frequency if the CPU has not been idle
291 * recently, as the reduction is likely to be premature then.
293 if (busy && next_f < sg_policy->next_freq) {
294 next_f = sg_policy->next_freq;
296 /* Reset cached freq as next_freq has changed */
297 sg_policy->cached_raw_freq = 0;
300 sugov_update_commit(sg_policy, time, next_f);
303 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
305 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
306 struct cpufreq_policy *policy = sg_policy->policy;
307 unsigned long util = 0, max = 1;
308 unsigned int j;
310 for_each_cpu(j, policy->cpus) {
311 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
312 unsigned long j_util, j_max;
313 s64 delta_ns;
316 * If the CFS CPU utilization was last updated before the
317 * previous frequency update and the time elapsed between the
318 * last update of the CPU utilization and the last frequency
319 * update is long enough, reset iowait_boost and util_cfs, as
320 * they are now probably stale. However, still consider the
321 * CPU contribution if it has some DEADLINE utilization
322 * (util_dl).
324 delta_ns = time - j_sg_cpu->last_update;
325 if (delta_ns > TICK_NSEC) {
326 j_sg_cpu->iowait_boost = 0;
327 j_sg_cpu->iowait_boost_pending = false;
328 j_sg_cpu->util_cfs = 0;
329 if (j_sg_cpu->util_dl == 0)
330 continue;
332 if (j_sg_cpu->flags & SCHED_CPUFREQ_RT)
333 return policy->cpuinfo.max_freq;
335 j_max = j_sg_cpu->max;
336 j_util = sugov_aggregate_util(j_sg_cpu);
337 if (j_util * max > j_max * util) {
338 util = j_util;
339 max = j_max;
342 sugov_iowait_boost(j_sg_cpu, &util, &max);
345 return get_next_freq(sg_policy, util, max);
348 static void sugov_update_shared(struct update_util_data *hook, u64 time,
349 unsigned int flags)
351 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
352 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
353 unsigned int next_f;
355 raw_spin_lock(&sg_policy->update_lock);
357 sugov_get_util(sg_cpu);
358 sg_cpu->flags = flags;
360 sugov_set_iowait_boost(sg_cpu, time);
361 sg_cpu->last_update = time;
363 if (sugov_should_update_freq(sg_policy, time)) {
364 if (flags & SCHED_CPUFREQ_RT)
365 next_f = sg_policy->policy->cpuinfo.max_freq;
366 else
367 next_f = sugov_next_freq_shared(sg_cpu, time);
369 sugov_update_commit(sg_policy, time, next_f);
372 raw_spin_unlock(&sg_policy->update_lock);
375 static void sugov_work(struct kthread_work *work)
377 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
379 mutex_lock(&sg_policy->work_lock);
380 __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
381 CPUFREQ_RELATION_L);
382 mutex_unlock(&sg_policy->work_lock);
384 sg_policy->work_in_progress = false;
387 static void sugov_irq_work(struct irq_work *irq_work)
389 struct sugov_policy *sg_policy;
391 sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
394 * For RT tasks, the schedutil governor shoots the frequency to maximum.
395 * Special care must be taken to ensure that this kthread doesn't result
396 * in the same behavior.
398 * This is (mostly) guaranteed by the work_in_progress flag. The flag is
399 * updated only at the end of the sugov_work() function and before that
400 * the schedutil governor rejects all other frequency scaling requests.
402 * There is a very rare case though, where the RT thread yields right
403 * after the work_in_progress flag is cleared. The effects of that are
404 * neglected for now.
406 kthread_queue_work(&sg_policy->worker, &sg_policy->work);
409 /************************** sysfs interface ************************/
411 static struct sugov_tunables *global_tunables;
412 static DEFINE_MUTEX(global_tunables_lock);
414 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
416 return container_of(attr_set, struct sugov_tunables, attr_set);
419 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
421 struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
423 return sprintf(buf, "%u\n", tunables->rate_limit_us);
426 static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
427 size_t count)
429 struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
430 struct sugov_policy *sg_policy;
431 unsigned int rate_limit_us;
433 if (kstrtouint(buf, 10, &rate_limit_us))
434 return -EINVAL;
436 tunables->rate_limit_us = rate_limit_us;
438 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
439 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
441 return count;
444 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
446 static struct attribute *sugov_attributes[] = {
447 &rate_limit_us.attr,
448 NULL
451 static struct kobj_type sugov_tunables_ktype = {
452 .default_attrs = sugov_attributes,
453 .sysfs_ops = &governor_sysfs_ops,
456 /********************** cpufreq governor interface *********************/
458 static struct cpufreq_governor schedutil_gov;
460 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
462 struct sugov_policy *sg_policy;
464 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
465 if (!sg_policy)
466 return NULL;
468 sg_policy->policy = policy;
469 raw_spin_lock_init(&sg_policy->update_lock);
470 return sg_policy;
473 static void sugov_policy_free(struct sugov_policy *sg_policy)
475 kfree(sg_policy);
478 static int sugov_kthread_create(struct sugov_policy *sg_policy)
480 struct task_struct *thread;
481 struct sched_attr attr = {
482 .size = sizeof(struct sched_attr),
483 .sched_policy = SCHED_DEADLINE,
484 .sched_flags = SCHED_FLAG_SUGOV,
485 .sched_nice = 0,
486 .sched_priority = 0,
488 * Fake (unused) bandwidth; workaround to "fix"
489 * priority inheritance.
491 .sched_runtime = 1000000,
492 .sched_deadline = 10000000,
493 .sched_period = 10000000,
495 struct cpufreq_policy *policy = sg_policy->policy;
496 int ret;
498 /* kthread only required for slow path */
499 if (policy->fast_switch_enabled)
500 return 0;
502 kthread_init_work(&sg_policy->work, sugov_work);
503 kthread_init_worker(&sg_policy->worker);
504 thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
505 "sugov:%d",
506 cpumask_first(policy->related_cpus));
507 if (IS_ERR(thread)) {
508 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
509 return PTR_ERR(thread);
512 ret = sched_setattr_nocheck(thread, &attr);
513 if (ret) {
514 kthread_stop(thread);
515 pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
516 return ret;
519 sg_policy->thread = thread;
521 /* Kthread is bound to all CPUs by default */
522 if (!policy->dvfs_possible_from_any_cpu)
523 kthread_bind_mask(thread, policy->related_cpus);
525 init_irq_work(&sg_policy->irq_work, sugov_irq_work);
526 mutex_init(&sg_policy->work_lock);
528 wake_up_process(thread);
530 return 0;
533 static void sugov_kthread_stop(struct sugov_policy *sg_policy)
535 /* kthread only required for slow path */
536 if (sg_policy->policy->fast_switch_enabled)
537 return;
539 kthread_flush_worker(&sg_policy->worker);
540 kthread_stop(sg_policy->thread);
541 mutex_destroy(&sg_policy->work_lock);
544 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
546 struct sugov_tunables *tunables;
548 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
549 if (tunables) {
550 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
551 if (!have_governor_per_policy())
552 global_tunables = tunables;
554 return tunables;
557 static void sugov_tunables_free(struct sugov_tunables *tunables)
559 if (!have_governor_per_policy())
560 global_tunables = NULL;
562 kfree(tunables);
565 static int sugov_init(struct cpufreq_policy *policy)
567 struct sugov_policy *sg_policy;
568 struct sugov_tunables *tunables;
569 int ret = 0;
571 /* State should be equivalent to EXIT */
572 if (policy->governor_data)
573 return -EBUSY;
575 cpufreq_enable_fast_switch(policy);
577 sg_policy = sugov_policy_alloc(policy);
578 if (!sg_policy) {
579 ret = -ENOMEM;
580 goto disable_fast_switch;
583 ret = sugov_kthread_create(sg_policy);
584 if (ret)
585 goto free_sg_policy;
587 mutex_lock(&global_tunables_lock);
589 if (global_tunables) {
590 if (WARN_ON(have_governor_per_policy())) {
591 ret = -EINVAL;
592 goto stop_kthread;
594 policy->governor_data = sg_policy;
595 sg_policy->tunables = global_tunables;
597 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
598 goto out;
601 tunables = sugov_tunables_alloc(sg_policy);
602 if (!tunables) {
603 ret = -ENOMEM;
604 goto stop_kthread;
607 tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
609 policy->governor_data = sg_policy;
610 sg_policy->tunables = tunables;
612 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
613 get_governor_parent_kobj(policy), "%s",
614 schedutil_gov.name);
615 if (ret)
616 goto fail;
618 out:
619 mutex_unlock(&global_tunables_lock);
620 return 0;
622 fail:
623 policy->governor_data = NULL;
624 sugov_tunables_free(tunables);
626 stop_kthread:
627 sugov_kthread_stop(sg_policy);
629 free_sg_policy:
630 mutex_unlock(&global_tunables_lock);
632 sugov_policy_free(sg_policy);
634 disable_fast_switch:
635 cpufreq_disable_fast_switch(policy);
637 pr_err("initialization failed (error %d)\n", ret);
638 return ret;
641 static void sugov_exit(struct cpufreq_policy *policy)
643 struct sugov_policy *sg_policy = policy->governor_data;
644 struct sugov_tunables *tunables = sg_policy->tunables;
645 unsigned int count;
647 mutex_lock(&global_tunables_lock);
649 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
650 policy->governor_data = NULL;
651 if (!count)
652 sugov_tunables_free(tunables);
654 mutex_unlock(&global_tunables_lock);
656 sugov_kthread_stop(sg_policy);
657 sugov_policy_free(sg_policy);
658 cpufreq_disable_fast_switch(policy);
661 static int sugov_start(struct cpufreq_policy *policy)
663 struct sugov_policy *sg_policy = policy->governor_data;
664 unsigned int cpu;
666 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
667 sg_policy->last_freq_update_time = 0;
668 sg_policy->next_freq = UINT_MAX;
669 sg_policy->work_in_progress = false;
670 sg_policy->need_freq_update = false;
671 sg_policy->cached_raw_freq = 0;
673 for_each_cpu(cpu, policy->cpus) {
674 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
676 memset(sg_cpu, 0, sizeof(*sg_cpu));
677 sg_cpu->cpu = cpu;
678 sg_cpu->sg_policy = sg_policy;
679 sg_cpu->flags = 0;
680 sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
683 for_each_cpu(cpu, policy->cpus) {
684 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
686 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
687 policy_is_shared(policy) ?
688 sugov_update_shared :
689 sugov_update_single);
691 return 0;
694 static void sugov_stop(struct cpufreq_policy *policy)
696 struct sugov_policy *sg_policy = policy->governor_data;
697 unsigned int cpu;
699 for_each_cpu(cpu, policy->cpus)
700 cpufreq_remove_update_util_hook(cpu);
702 synchronize_sched();
704 if (!policy->fast_switch_enabled) {
705 irq_work_sync(&sg_policy->irq_work);
706 kthread_cancel_work_sync(&sg_policy->work);
710 static void sugov_limits(struct cpufreq_policy *policy)
712 struct sugov_policy *sg_policy = policy->governor_data;
714 if (!policy->fast_switch_enabled) {
715 mutex_lock(&sg_policy->work_lock);
716 cpufreq_policy_apply_limits(policy);
717 mutex_unlock(&sg_policy->work_lock);
720 sg_policy->need_freq_update = true;
723 static struct cpufreq_governor schedutil_gov = {
724 .name = "schedutil",
725 .owner = THIS_MODULE,
726 .dynamic_switching = true,
727 .init = sugov_init,
728 .exit = sugov_exit,
729 .start = sugov_start,
730 .stop = sugov_stop,
731 .limits = sugov_limits,
734 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
735 struct cpufreq_governor *cpufreq_default_governor(void)
737 return &schedutil_gov;
739 #endif
741 static int __init sugov_register(void)
743 return cpufreq_register_governor(&schedutil_gov);
745 fs_initcall(sugov_register);