1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * amd-pstate.c - AMD Processor P-state Frequency Driver
5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
7 * Author: Huang Rui <ray.huang@amd.com>
9 * AMD P-State introduces a new CPU performance scaling design for AMD
10 * processors using the ACPI Collaborative Performance and Power Control (CPPC)
11 * feature which works with the AMD SMU firmware providing a finer grained
12 * frequency control range. It is to replace the legacy ACPI P-States control,
13 * allows a flexible, low-latency interface for the Linux kernel to directly
14 * communicate the performance hints to hardware.
16 * AMD P-State is supported on recent AMD Zen base CPU series include some of
17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD
18 * P-State supported system. And there are two types of hardware implementations
19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution.
20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
25 #include <linux/kernel.h>
26 #include <linux/module.h>
27 #include <linux/init.h>
28 #include <linux/smp.h>
29 #include <linux/sched.h>
30 #include <linux/cpufreq.h>
31 #include <linux/compiler.h>
32 #include <linux/dmi.h>
33 #include <linux/slab.h>
34 #include <linux/acpi.h>
36 #include <linux/delay.h>
37 #include <linux/uaccess.h>
38 #include <linux/static_call.h>
39 #include <linux/topology.h>
41 #include <acpi/processor.h>
42 #include <acpi/cppc_acpi.h>
45 #include <asm/processor.h>
46 #include <asm/cpufeature.h>
47 #include <asm/cpu_device_id.h>
49 #include "amd-pstate.h"
50 #include "amd-pstate-trace.h"
52 #define AMD_PSTATE_TRANSITION_LATENCY 20000
53 #define AMD_PSTATE_TRANSITION_DELAY 1000
54 #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
56 #define AMD_CPPC_EPP_PERFORMANCE 0x00
57 #define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
58 #define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
59 #define AMD_CPPC_EPP_POWERSAVE 0xFF
61 static const char * const amd_pstate_mode_string
[] = {
62 [AMD_PSTATE_UNDEFINED
] = "undefined",
63 [AMD_PSTATE_DISABLE
] = "disable",
64 [AMD_PSTATE_PASSIVE
] = "passive",
65 [AMD_PSTATE_ACTIVE
] = "active",
66 [AMD_PSTATE_GUIDED
] = "guided",
70 const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode
)
72 if (mode
< 0 || mode
>= AMD_PSTATE_MAX
)
74 return amd_pstate_mode_string
[mode
];
76 EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string
);
83 static struct cpufreq_driver
*current_pstate_driver
;
84 static struct cpufreq_driver amd_pstate_driver
;
85 static struct cpufreq_driver amd_pstate_epp_driver
;
86 static int cppc_state
= AMD_PSTATE_UNDEFINED
;
87 static bool cppc_enabled
;
88 static bool amd_pstate_prefcore
= true;
89 static struct quirk_entry
*quirks
;
92 * AMD Energy Preference Performance (EPP)
93 * The EPP is used in the CCLK DPM controller to drive
94 * the frequency that a core is going to operate during
95 * short periods of activity. EPP values will be utilized for
96 * different OS profiles (balanced, performance, power savings)
97 * display strings corresponding to EPP index in the
98 * energy_perf_strings[]
100 *-------------------------------------
103 * 2 balance_performance
107 enum energy_perf_value_index
{
108 EPP_INDEX_DEFAULT
= 0,
109 EPP_INDEX_PERFORMANCE
,
110 EPP_INDEX_BALANCE_PERFORMANCE
,
111 EPP_INDEX_BALANCE_POWERSAVE
,
115 static const char * const energy_perf_strings
[] = {
116 [EPP_INDEX_DEFAULT
] = "default",
117 [EPP_INDEX_PERFORMANCE
] = "performance",
118 [EPP_INDEX_BALANCE_PERFORMANCE
] = "balance_performance",
119 [EPP_INDEX_BALANCE_POWERSAVE
] = "balance_power",
120 [EPP_INDEX_POWERSAVE
] = "power",
124 static unsigned int epp_values
[] = {
125 [EPP_INDEX_DEFAULT
] = 0,
126 [EPP_INDEX_PERFORMANCE
] = AMD_CPPC_EPP_PERFORMANCE
,
127 [EPP_INDEX_BALANCE_PERFORMANCE
] = AMD_CPPC_EPP_BALANCE_PERFORMANCE
,
128 [EPP_INDEX_BALANCE_POWERSAVE
] = AMD_CPPC_EPP_BALANCE_POWERSAVE
,
129 [EPP_INDEX_POWERSAVE
] = AMD_CPPC_EPP_POWERSAVE
,
132 typedef int (*cppc_mode_transition_fn
)(int);
134 static struct quirk_entry quirk_amd_7k62
= {
135 .nominal_freq
= 2600,
139 static int __init
dmi_matched_7k62_bios_bug(const struct dmi_system_id
*dmi
)
142 * match the broken bios for family 17h processor support CPPC V2
143 * broken BIOS lack of nominal_freq and lowest_freq capabilities
144 * definition in ACPI tables
146 if (cpu_feature_enabled(X86_FEATURE_ZEN2
)) {
147 quirks
= dmi
->driver_data
;
148 pr_info("Overriding nominal and lowest frequencies for %s\n", dmi
->ident
);
155 static const struct dmi_system_id amd_pstate_quirks_table
[] __initconst
= {
157 .callback
= dmi_matched_7k62_bios_bug
,
158 .ident
= "AMD EPYC 7K62",
160 DMI_MATCH(DMI_BIOS_VERSION
, "5.14"),
161 DMI_MATCH(DMI_BIOS_RELEASE
, "12/12/2019"),
163 .driver_data
= &quirk_amd_7k62
,
167 MODULE_DEVICE_TABLE(dmi
, amd_pstate_quirks_table
);
169 static inline int get_mode_idx_from_str(const char *str
, size_t size
)
173 for (i
=0; i
< AMD_PSTATE_MAX
; i
++) {
174 if (!strncmp(str
, amd_pstate_mode_string
[i
], size
))
180 static DEFINE_MUTEX(amd_pstate_limits_lock
);
181 static DEFINE_MUTEX(amd_pstate_driver_lock
);
183 static s16
amd_pstate_get_epp(struct amd_cpudata
*cpudata
, u64 cppc_req_cached
)
188 if (cpu_feature_enabled(X86_FEATURE_CPPC
)) {
189 if (!cppc_req_cached
) {
190 epp
= rdmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_REQ
,
195 epp
= (cppc_req_cached
>> 24) & 0xFF;
197 ret
= cppc_get_epp_perf(cpudata
->cpu
, &epp
);
199 pr_debug("Could not retrieve energy perf value (%d)\n", ret
);
204 return (s16
)(epp
& 0xff);
207 static int amd_pstate_get_energy_pref_index(struct amd_cpudata
*cpudata
)
212 epp
= amd_pstate_get_epp(cpudata
, 0);
217 case AMD_CPPC_EPP_PERFORMANCE
:
218 index
= EPP_INDEX_PERFORMANCE
;
220 case AMD_CPPC_EPP_BALANCE_PERFORMANCE
:
221 index
= EPP_INDEX_BALANCE_PERFORMANCE
;
223 case AMD_CPPC_EPP_BALANCE_POWERSAVE
:
224 index
= EPP_INDEX_BALANCE_POWERSAVE
;
226 case AMD_CPPC_EPP_POWERSAVE
:
227 index
= EPP_INDEX_POWERSAVE
;
236 static void msr_update_perf(struct amd_cpudata
*cpudata
, u32 min_perf
,
237 u32 des_perf
, u32 max_perf
, bool fast_switch
)
240 wrmsrl(MSR_AMD_CPPC_REQ
, READ_ONCE(cpudata
->cppc_req_cached
));
242 wrmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_REQ
,
243 READ_ONCE(cpudata
->cppc_req_cached
));
246 DEFINE_STATIC_CALL(amd_pstate_update_perf
, msr_update_perf
);
248 static inline void amd_pstate_update_perf(struct amd_cpudata
*cpudata
,
249 u32 min_perf
, u32 des_perf
,
250 u32 max_perf
, bool fast_switch
)
252 static_call(amd_pstate_update_perf
)(cpudata
, min_perf
, des_perf
,
253 max_perf
, fast_switch
);
256 static int amd_pstate_set_epp(struct amd_cpudata
*cpudata
, u32 epp
)
259 struct cppc_perf_ctrls perf_ctrls
;
261 if (cpu_feature_enabled(X86_FEATURE_CPPC
)) {
262 u64 value
= READ_ONCE(cpudata
->cppc_req_cached
);
264 value
&= ~GENMASK_ULL(31, 24);
265 value
|= (u64
)epp
<< 24;
266 WRITE_ONCE(cpudata
->cppc_req_cached
, value
);
268 ret
= wrmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_REQ
, value
);
270 cpudata
->epp_cached
= epp
;
272 amd_pstate_update_perf(cpudata
, cpudata
->min_limit_perf
, 0U,
273 cpudata
->max_limit_perf
, false);
275 perf_ctrls
.energy_perf
= epp
;
276 ret
= cppc_set_epp_perf(cpudata
->cpu
, &perf_ctrls
, 1);
278 pr_debug("failed to set energy perf value (%d)\n", ret
);
281 cpudata
->epp_cached
= epp
;
287 static int amd_pstate_set_energy_pref_index(struct amd_cpudata
*cpudata
,
294 epp
= cpudata
->epp_default
;
297 epp
= epp_values
[pref_index
];
299 if (epp
> 0 && cpudata
->policy
== CPUFREQ_POLICY_PERFORMANCE
) {
300 pr_debug("EPP cannot be set under performance policy\n");
304 ret
= amd_pstate_set_epp(cpudata
, epp
);
309 static inline int msr_cppc_enable(bool enable
)
312 unsigned long logical_proc_id_mask
= 0;
315 * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared.
320 if (enable
== cppc_enabled
)
323 for_each_present_cpu(cpu
) {
324 unsigned long logical_id
= topology_logical_package_id(cpu
);
326 if (test_bit(logical_id
, &logical_proc_id_mask
))
329 set_bit(logical_id
, &logical_proc_id_mask
);
331 ret
= wrmsrl_safe_on_cpu(cpu
, MSR_AMD_CPPC_ENABLE
,
337 cppc_enabled
= enable
;
341 static int shmem_cppc_enable(bool enable
)
344 struct cppc_perf_ctrls perf_ctrls
;
346 if (enable
== cppc_enabled
)
349 for_each_present_cpu(cpu
) {
350 ret
= cppc_set_enable(cpu
, enable
);
354 /* Enable autonomous mode for EPP */
355 if (cppc_state
== AMD_PSTATE_ACTIVE
) {
356 /* Set desired perf as zero to allow EPP firmware control */
357 perf_ctrls
.desired_perf
= 0;
358 ret
= cppc_set_perf(cpu
, &perf_ctrls
);
364 cppc_enabled
= enable
;
368 DEFINE_STATIC_CALL(amd_pstate_cppc_enable
, msr_cppc_enable
);
370 static inline int amd_pstate_cppc_enable(bool enable
)
372 return static_call(amd_pstate_cppc_enable
)(enable
);
375 static int msr_init_perf(struct amd_cpudata
*cpudata
)
379 int ret
= rdmsrl_safe_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_CAP1
,
384 WRITE_ONCE(cpudata
->highest_perf
, AMD_CPPC_HIGHEST_PERF(cap1
));
385 WRITE_ONCE(cpudata
->max_limit_perf
, AMD_CPPC_HIGHEST_PERF(cap1
));
386 WRITE_ONCE(cpudata
->nominal_perf
, AMD_CPPC_NOMINAL_PERF(cap1
));
387 WRITE_ONCE(cpudata
->lowest_nonlinear_perf
, AMD_CPPC_LOWNONLIN_PERF(cap1
));
388 WRITE_ONCE(cpudata
->lowest_perf
, AMD_CPPC_LOWEST_PERF(cap1
));
389 WRITE_ONCE(cpudata
->prefcore_ranking
, AMD_CPPC_HIGHEST_PERF(cap1
));
390 WRITE_ONCE(cpudata
->min_limit_perf
, AMD_CPPC_LOWEST_PERF(cap1
));
394 static int shmem_init_perf(struct amd_cpudata
*cpudata
)
396 struct cppc_perf_caps cppc_perf
;
398 int ret
= cppc_get_perf_caps(cpudata
->cpu
, &cppc_perf
);
402 WRITE_ONCE(cpudata
->highest_perf
, cppc_perf
.highest_perf
);
403 WRITE_ONCE(cpudata
->max_limit_perf
, cppc_perf
.highest_perf
);
404 WRITE_ONCE(cpudata
->nominal_perf
, cppc_perf
.nominal_perf
);
405 WRITE_ONCE(cpudata
->lowest_nonlinear_perf
,
406 cppc_perf
.lowest_nonlinear_perf
);
407 WRITE_ONCE(cpudata
->lowest_perf
, cppc_perf
.lowest_perf
);
408 WRITE_ONCE(cpudata
->prefcore_ranking
, cppc_perf
.highest_perf
);
409 WRITE_ONCE(cpudata
->min_limit_perf
, cppc_perf
.lowest_perf
);
411 if (cppc_state
== AMD_PSTATE_ACTIVE
)
414 ret
= cppc_get_auto_sel_caps(cpudata
->cpu
, &cppc_perf
);
416 pr_warn("failed to get auto_sel, ret: %d\n", ret
);
420 ret
= cppc_set_auto_sel(cpudata
->cpu
,
421 (cppc_state
== AMD_PSTATE_PASSIVE
) ? 0 : 1);
424 pr_warn("failed to set auto_sel, ret: %d\n", ret
);
429 DEFINE_STATIC_CALL(amd_pstate_init_perf
, msr_init_perf
);
431 static inline int amd_pstate_init_perf(struct amd_cpudata
*cpudata
)
433 return static_call(amd_pstate_init_perf
)(cpudata
);
436 static void shmem_update_perf(struct amd_cpudata
*cpudata
,
437 u32 min_perf
, u32 des_perf
,
438 u32 max_perf
, bool fast_switch
)
440 struct cppc_perf_ctrls perf_ctrls
;
442 perf_ctrls
.max_perf
= max_perf
;
443 perf_ctrls
.min_perf
= min_perf
;
444 perf_ctrls
.desired_perf
= des_perf
;
446 cppc_set_perf(cpudata
->cpu
, &perf_ctrls
);
449 static inline bool amd_pstate_sample(struct amd_cpudata
*cpudata
)
451 u64 aperf
, mperf
, tsc
;
454 local_irq_save(flags
);
455 rdmsrl(MSR_IA32_APERF
, aperf
);
456 rdmsrl(MSR_IA32_MPERF
, mperf
);
459 if (cpudata
->prev
.mperf
== mperf
|| cpudata
->prev
.tsc
== tsc
) {
460 local_irq_restore(flags
);
464 local_irq_restore(flags
);
466 cpudata
->cur
.aperf
= aperf
;
467 cpudata
->cur
.mperf
= mperf
;
468 cpudata
->cur
.tsc
= tsc
;
469 cpudata
->cur
.aperf
-= cpudata
->prev
.aperf
;
470 cpudata
->cur
.mperf
-= cpudata
->prev
.mperf
;
471 cpudata
->cur
.tsc
-= cpudata
->prev
.tsc
;
473 cpudata
->prev
.aperf
= aperf
;
474 cpudata
->prev
.mperf
= mperf
;
475 cpudata
->prev
.tsc
= tsc
;
477 cpudata
->freq
= div64_u64((cpudata
->cur
.aperf
* cpu_khz
), cpudata
->cur
.mperf
);
482 static void amd_pstate_update(struct amd_cpudata
*cpudata
, u32 min_perf
,
483 u32 des_perf
, u32 max_perf
, bool fast_switch
, int gov_flags
)
485 unsigned long max_freq
;
486 struct cpufreq_policy
*policy
= cpufreq_cpu_get(cpudata
->cpu
);
487 u64 prev
= READ_ONCE(cpudata
->cppc_req_cached
);
488 u32 nominal_perf
= READ_ONCE(cpudata
->nominal_perf
);
491 min_perf
= clamp_t(unsigned long, min_perf
, cpudata
->min_limit_perf
,
492 cpudata
->max_limit_perf
);
493 max_perf
= clamp_t(unsigned long, max_perf
, cpudata
->min_limit_perf
,
494 cpudata
->max_limit_perf
);
495 des_perf
= clamp_t(unsigned long, des_perf
, min_perf
, max_perf
);
497 max_freq
= READ_ONCE(cpudata
->max_limit_freq
);
498 policy
->cur
= div_u64(des_perf
* max_freq
, max_perf
);
500 if ((cppc_state
== AMD_PSTATE_GUIDED
) && (gov_flags
& CPUFREQ_GOV_DYNAMIC_SWITCHING
)) {
505 value
&= ~AMD_CPPC_MIN_PERF(~0L);
506 value
|= AMD_CPPC_MIN_PERF(min_perf
);
508 value
&= ~AMD_CPPC_DES_PERF(~0L);
509 value
|= AMD_CPPC_DES_PERF(des_perf
);
511 /* limit the max perf when core performance boost feature is disabled */
512 if (!cpudata
->boost_supported
)
513 max_perf
= min_t(unsigned long, nominal_perf
, max_perf
);
515 value
&= ~AMD_CPPC_MAX_PERF(~0L);
516 value
|= AMD_CPPC_MAX_PERF(max_perf
);
518 if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata
)) {
519 trace_amd_pstate_perf(min_perf
, des_perf
, max_perf
, cpudata
->freq
,
520 cpudata
->cur
.mperf
, cpudata
->cur
.aperf
, cpudata
->cur
.tsc
,
521 cpudata
->cpu
, (value
!= prev
), fast_switch
);
525 goto cpufreq_policy_put
;
527 WRITE_ONCE(cpudata
->cppc_req_cached
, value
);
529 amd_pstate_update_perf(cpudata
, min_perf
, des_perf
,
530 max_perf
, fast_switch
);
533 cpufreq_cpu_put(policy
);
536 static int amd_pstate_verify(struct cpufreq_policy_data
*policy_data
)
539 * Initialize lower frequency limit (i.e.policy->min) with
540 * lowest_nonlinear_frequency which is the most energy efficient
541 * frequency. Override the initial value set by cpufreq core and
542 * amd-pstate qos_requests.
544 if (policy_data
->min
== FREQ_QOS_MIN_DEFAULT_VALUE
) {
545 struct cpufreq_policy
*policy
= cpufreq_cpu_get(policy_data
->cpu
);
546 struct amd_cpudata
*cpudata
;
551 cpudata
= policy
->driver_data
;
552 policy_data
->min
= cpudata
->lowest_nonlinear_freq
;
553 cpufreq_cpu_put(policy
);
556 cpufreq_verify_within_cpu_limits(policy_data
);
557 pr_debug("policy_max =%d, policy_min=%d\n", policy_data
->max
, policy_data
->min
);
562 static int amd_pstate_update_min_max_limit(struct cpufreq_policy
*policy
)
564 u32 max_limit_perf
, min_limit_perf
, lowest_perf
, max_perf
;
565 struct amd_cpudata
*cpudata
= policy
->driver_data
;
567 if (cpudata
->boost_supported
&& !policy
->boost_enabled
)
568 max_perf
= READ_ONCE(cpudata
->nominal_perf
);
570 max_perf
= READ_ONCE(cpudata
->highest_perf
);
572 max_limit_perf
= div_u64(policy
->max
* max_perf
, policy
->cpuinfo
.max_freq
);
573 min_limit_perf
= div_u64(policy
->min
* max_perf
, policy
->cpuinfo
.max_freq
);
575 lowest_perf
= READ_ONCE(cpudata
->lowest_perf
);
576 if (min_limit_perf
< lowest_perf
)
577 min_limit_perf
= lowest_perf
;
579 if (max_limit_perf
< min_limit_perf
)
580 max_limit_perf
= min_limit_perf
;
582 WRITE_ONCE(cpudata
->max_limit_perf
, max_limit_perf
);
583 WRITE_ONCE(cpudata
->min_limit_perf
, min_limit_perf
);
584 WRITE_ONCE(cpudata
->max_limit_freq
, policy
->max
);
585 WRITE_ONCE(cpudata
->min_limit_freq
, policy
->min
);
590 static int amd_pstate_update_freq(struct cpufreq_policy
*policy
,
591 unsigned int target_freq
, bool fast_switch
)
593 struct cpufreq_freqs freqs
;
594 struct amd_cpudata
*cpudata
= policy
->driver_data
;
595 unsigned long max_perf
, min_perf
, des_perf
, cap_perf
;
597 if (!cpudata
->max_freq
)
600 if (policy
->min
!= cpudata
->min_limit_freq
|| policy
->max
!= cpudata
->max_limit_freq
)
601 amd_pstate_update_min_max_limit(policy
);
603 cap_perf
= READ_ONCE(cpudata
->highest_perf
);
604 min_perf
= READ_ONCE(cpudata
->lowest_perf
);
607 freqs
.old
= policy
->cur
;
608 freqs
.new = target_freq
;
610 des_perf
= DIV_ROUND_CLOSEST(target_freq
* cap_perf
,
613 WARN_ON(fast_switch
&& !policy
->fast_switch_enabled
);
615 * If fast_switch is desired, then there aren't any registered
616 * transition notifiers. See comment for
617 * cpufreq_enable_fast_switch().
620 cpufreq_freq_transition_begin(policy
, &freqs
);
622 amd_pstate_update(cpudata
, min_perf
, des_perf
,
623 max_perf
, fast_switch
, policy
->governor
->flags
);
626 cpufreq_freq_transition_end(policy
, &freqs
, false);
631 static int amd_pstate_target(struct cpufreq_policy
*policy
,
632 unsigned int target_freq
,
633 unsigned int relation
)
635 return amd_pstate_update_freq(policy
, target_freq
, false);
638 static unsigned int amd_pstate_fast_switch(struct cpufreq_policy
*policy
,
639 unsigned int target_freq
)
641 if (!amd_pstate_update_freq(policy
, target_freq
, true))
646 static void amd_pstate_adjust_perf(unsigned int cpu
,
647 unsigned long _min_perf
,
648 unsigned long target_perf
,
649 unsigned long capacity
)
651 unsigned long max_perf
, min_perf
, des_perf
,
652 cap_perf
, lowest_nonlinear_perf
;
653 struct cpufreq_policy
*policy
= cpufreq_cpu_get(cpu
);
654 struct amd_cpudata
*cpudata
;
659 cpudata
= policy
->driver_data
;
661 if (policy
->min
!= cpudata
->min_limit_freq
|| policy
->max
!= cpudata
->max_limit_freq
)
662 amd_pstate_update_min_max_limit(policy
);
665 cap_perf
= READ_ONCE(cpudata
->highest_perf
);
666 lowest_nonlinear_perf
= READ_ONCE(cpudata
->lowest_nonlinear_perf
);
669 if (target_perf
< capacity
)
670 des_perf
= DIV_ROUND_UP(cap_perf
* target_perf
, capacity
);
672 min_perf
= READ_ONCE(cpudata
->lowest_perf
);
673 if (_min_perf
< capacity
)
674 min_perf
= DIV_ROUND_UP(cap_perf
* _min_perf
, capacity
);
676 if (min_perf
< lowest_nonlinear_perf
)
677 min_perf
= lowest_nonlinear_perf
;
680 if (max_perf
< min_perf
)
683 des_perf
= clamp_t(unsigned long, des_perf
, min_perf
, max_perf
);
685 amd_pstate_update(cpudata
, min_perf
, des_perf
, max_perf
, true,
686 policy
->governor
->flags
);
687 cpufreq_cpu_put(policy
);
690 static int amd_pstate_cpu_boost_update(struct cpufreq_policy
*policy
, bool on
)
692 struct amd_cpudata
*cpudata
= policy
->driver_data
;
693 u32 nominal_freq
, max_freq
;
696 nominal_freq
= READ_ONCE(cpudata
->nominal_freq
);
697 max_freq
= READ_ONCE(cpudata
->max_freq
);
700 policy
->cpuinfo
.max_freq
= max_freq
;
701 else if (policy
->cpuinfo
.max_freq
> nominal_freq
* 1000)
702 policy
->cpuinfo
.max_freq
= nominal_freq
* 1000;
704 policy
->max
= policy
->cpuinfo
.max_freq
;
706 if (cppc_state
== AMD_PSTATE_PASSIVE
) {
707 ret
= freq_qos_update_request(&cpudata
->req
[1], policy
->cpuinfo
.max_freq
);
709 pr_debug("Failed to update freq constraint: CPU%d\n", cpudata
->cpu
);
712 return ret
< 0 ? ret
: 0;
715 static int amd_pstate_set_boost(struct cpufreq_policy
*policy
, int state
)
717 struct amd_cpudata
*cpudata
= policy
->driver_data
;
720 if (!cpudata
->boost_supported
) {
721 pr_err("Boost mode is not supported by this processor or SBIOS\n");
724 mutex_lock(&amd_pstate_driver_lock
);
725 ret
= amd_pstate_cpu_boost_update(policy
, state
);
726 WRITE_ONCE(cpudata
->boost_state
, !ret
? state
: false);
727 policy
->boost_enabled
= !ret
? state
: false;
728 refresh_frequency_limits(policy
);
729 mutex_unlock(&amd_pstate_driver_lock
);
734 static int amd_pstate_init_boost_support(struct amd_cpudata
*cpudata
)
740 * If platform has no CPB support or disable it, initialize current driver
741 * boost_enabled state to be false, it is not an error for cpufreq core to handle.
743 if (!cpu_feature_enabled(X86_FEATURE_CPB
)) {
744 pr_debug_once("Boost CPB capabilities not present in the processor\n");
749 /* at least one CPU supports CPB, even if others fail later on to set up */
750 current_pstate_driver
->boost_enabled
= true;
752 ret
= rdmsrl_on_cpu(cpudata
->cpu
, MSR_K7_HWCR
, &boost_val
);
754 pr_err_once("failed to read initial CPU boost state!\n");
759 if (!(boost_val
& MSR_K7_HWCR_CPB_DIS
))
760 cpudata
->boost_supported
= true;
765 cpudata
->boost_supported
= false;
769 static void amd_perf_ctl_reset(unsigned int cpu
)
771 wrmsrl_on_cpu(cpu
, MSR_AMD_PERF_CTL
, 0);
775 * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
776 * due to locking, so queue the work for later.
778 static void amd_pstste_sched_prefcore_workfn(struct work_struct
*work
)
780 sched_set_itmt_support();
782 static DECLARE_WORK(sched_prefcore_work
, amd_pstste_sched_prefcore_workfn
);
784 #define CPPC_MAX_PERF U8_MAX
786 static void amd_pstate_init_prefcore(struct amd_cpudata
*cpudata
)
788 /* user disabled or not detected */
789 if (!amd_pstate_prefcore
)
792 cpudata
->hw_prefcore
= true;
795 * The priorities can be set regardless of whether or not
796 * sched_set_itmt_support(true) has been called and it is valid to
797 * update them at any time after it has been called.
799 sched_set_itmt_core_prio((int)READ_ONCE(cpudata
->highest_perf
), cpudata
->cpu
);
801 schedule_work(&sched_prefcore_work
);
804 static void amd_pstate_update_limits(unsigned int cpu
)
806 struct cpufreq_policy
*policy
= cpufreq_cpu_get(cpu
);
807 struct amd_cpudata
*cpudata
;
808 u32 prev_high
= 0, cur_high
= 0;
810 bool highest_perf_changed
= false;
815 cpudata
= policy
->driver_data
;
817 if (!amd_pstate_prefcore
)
820 mutex_lock(&amd_pstate_driver_lock
);
821 ret
= amd_get_highest_perf(cpu
, &cur_high
);
823 goto free_cpufreq_put
;
825 prev_high
= READ_ONCE(cpudata
->prefcore_ranking
);
826 highest_perf_changed
= (prev_high
!= cur_high
);
827 if (highest_perf_changed
) {
828 WRITE_ONCE(cpudata
->prefcore_ranking
, cur_high
);
830 if (cur_high
< CPPC_MAX_PERF
)
831 sched_set_itmt_core_prio((int)cur_high
, cpu
);
835 cpufreq_cpu_put(policy
);
837 if (!highest_perf_changed
)
838 cpufreq_update_policy(cpu
);
840 mutex_unlock(&amd_pstate_driver_lock
);
844 * Get pstate transition delay time from ACPI tables that firmware set
845 * instead of using hardcode value directly.
847 static u32
amd_pstate_get_transition_delay_us(unsigned int cpu
)
849 u32 transition_delay_ns
;
851 transition_delay_ns
= cppc_get_transition_latency(cpu
);
852 if (transition_delay_ns
== CPUFREQ_ETERNAL
) {
853 if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC
))
854 return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY
;
856 return AMD_PSTATE_TRANSITION_DELAY
;
859 return transition_delay_ns
/ NSEC_PER_USEC
;
863 * Get pstate transition latency value from ACPI tables that firmware
864 * set instead of using hardcode value directly.
866 static u32
amd_pstate_get_transition_latency(unsigned int cpu
)
868 u32 transition_latency
;
870 transition_latency
= cppc_get_transition_latency(cpu
);
871 if (transition_latency
== CPUFREQ_ETERNAL
)
872 return AMD_PSTATE_TRANSITION_LATENCY
;
874 return transition_latency
;
878 * amd_pstate_init_freq: Initialize the max_freq, min_freq,
879 * nominal_freq and lowest_nonlinear_freq for
880 * the @cpudata object.
882 * Requires: highest_perf, lowest_perf, nominal_perf and
883 * lowest_nonlinear_perf members of @cpudata to be
886 * Returns 0 on success, non-zero value on failure.
888 static int amd_pstate_init_freq(struct amd_cpudata
*cpudata
)
891 u32 min_freq
, max_freq
;
893 u32 nominal_perf
, nominal_freq
;
894 u32 lowest_nonlinear_perf
, lowest_nonlinear_freq
;
895 u32 boost_ratio
, lowest_nonlinear_ratio
;
896 struct cppc_perf_caps cppc_perf
;
898 ret
= cppc_get_perf_caps(cpudata
->cpu
, &cppc_perf
);
902 if (quirks
&& quirks
->lowest_freq
)
903 min_freq
= quirks
->lowest_freq
* 1000;
905 min_freq
= cppc_perf
.lowest_freq
* 1000;
907 if (quirks
&& quirks
->nominal_freq
)
908 nominal_freq
= quirks
->nominal_freq
;
910 nominal_freq
= cppc_perf
.nominal_freq
;
912 nominal_perf
= READ_ONCE(cpudata
->nominal_perf
);
914 ret
= amd_get_boost_ratio_numerator(cpudata
->cpu
, &numerator
);
917 boost_ratio
= div_u64(numerator
<< SCHED_CAPACITY_SHIFT
, nominal_perf
);
918 max_freq
= (nominal_freq
* boost_ratio
>> SCHED_CAPACITY_SHIFT
) * 1000;
920 lowest_nonlinear_perf
= READ_ONCE(cpudata
->lowest_nonlinear_perf
);
921 lowest_nonlinear_ratio
= div_u64(lowest_nonlinear_perf
<< SCHED_CAPACITY_SHIFT
,
923 lowest_nonlinear_freq
= (nominal_freq
* lowest_nonlinear_ratio
>> SCHED_CAPACITY_SHIFT
) * 1000;
925 WRITE_ONCE(cpudata
->min_freq
, min_freq
);
926 WRITE_ONCE(cpudata
->lowest_nonlinear_freq
, lowest_nonlinear_freq
);
927 WRITE_ONCE(cpudata
->nominal_freq
, nominal_freq
);
928 WRITE_ONCE(cpudata
->max_freq
, max_freq
);
931 * Below values need to be initialized correctly, otherwise driver will fail to load
932 * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf
933 * lowest_nonlinear_freq is a value between [min_freq, nominal_freq]
934 * Check _CPC in ACPI table objects if any values are incorrect
936 if (min_freq
<= 0 || max_freq
<= 0 || nominal_freq
<= 0 || min_freq
> max_freq
) {
937 pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n",
938 min_freq
, max_freq
, nominal_freq
* 1000);
942 if (lowest_nonlinear_freq
<= min_freq
|| lowest_nonlinear_freq
> nominal_freq
* 1000) {
943 pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n",
944 lowest_nonlinear_freq
, min_freq
, nominal_freq
* 1000);
951 static int amd_pstate_cpu_init(struct cpufreq_policy
*policy
)
953 int min_freq
, max_freq
, ret
;
955 struct amd_cpudata
*cpudata
;
958 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
959 * which is ideal for initialization process.
961 amd_perf_ctl_reset(policy
->cpu
);
962 dev
= get_cpu_device(policy
->cpu
);
966 cpudata
= kzalloc(sizeof(*cpudata
), GFP_KERNEL
);
970 cpudata
->cpu
= policy
->cpu
;
972 ret
= amd_pstate_init_perf(cpudata
);
976 amd_pstate_init_prefcore(cpudata
);
978 ret
= amd_pstate_init_freq(cpudata
);
982 ret
= amd_pstate_init_boost_support(cpudata
);
986 min_freq
= READ_ONCE(cpudata
->min_freq
);
987 max_freq
= READ_ONCE(cpudata
->max_freq
);
989 policy
->cpuinfo
.transition_latency
= amd_pstate_get_transition_latency(policy
->cpu
);
990 policy
->transition_delay_us
= amd_pstate_get_transition_delay_us(policy
->cpu
);
992 policy
->min
= min_freq
;
993 policy
->max
= max_freq
;
995 policy
->cpuinfo
.min_freq
= min_freq
;
996 policy
->cpuinfo
.max_freq
= max_freq
;
998 policy
->boost_enabled
= READ_ONCE(cpudata
->boost_supported
);
1000 /* It will be updated by governor */
1001 policy
->cur
= policy
->cpuinfo
.min_freq
;
1003 if (cpu_feature_enabled(X86_FEATURE_CPPC
))
1004 policy
->fast_switch_possible
= true;
1006 ret
= freq_qos_add_request(&policy
->constraints
, &cpudata
->req
[0],
1007 FREQ_QOS_MIN
, FREQ_QOS_MIN_DEFAULT_VALUE
);
1009 dev_err(dev
, "Failed to add min-freq constraint (%d)\n", ret
);
1013 ret
= freq_qos_add_request(&policy
->constraints
, &cpudata
->req
[1],
1014 FREQ_QOS_MAX
, policy
->cpuinfo
.max_freq
);
1016 dev_err(dev
, "Failed to add max-freq constraint (%d)\n", ret
);
1020 cpudata
->max_limit_freq
= max_freq
;
1021 cpudata
->min_limit_freq
= min_freq
;
1023 policy
->driver_data
= cpudata
;
1025 if (!current_pstate_driver
->adjust_perf
)
1026 current_pstate_driver
->adjust_perf
= amd_pstate_adjust_perf
;
1031 freq_qos_remove_request(&cpudata
->req
[0]);
1037 static void amd_pstate_cpu_exit(struct cpufreq_policy
*policy
)
1039 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1041 freq_qos_remove_request(&cpudata
->req
[1]);
1042 freq_qos_remove_request(&cpudata
->req
[0]);
1043 policy
->fast_switch_possible
= false;
1047 static int amd_pstate_cpu_resume(struct cpufreq_policy
*policy
)
1051 ret
= amd_pstate_cppc_enable(true);
1053 pr_err("failed to enable amd-pstate during resume, return %d\n", ret
);
1058 static int amd_pstate_cpu_suspend(struct cpufreq_policy
*policy
)
1062 ret
= amd_pstate_cppc_enable(false);
1064 pr_err("failed to disable amd-pstate during suspend, return %d\n", ret
);
1069 /* Sysfs attributes */
1072 * This frequency is to indicate the maximum hardware frequency.
1073 * If boost is not active but supported, the frequency will be larger than the
1076 static ssize_t
show_amd_pstate_max_freq(struct cpufreq_policy
*policy
,
1080 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1082 max_freq
= READ_ONCE(cpudata
->max_freq
);
1086 return sysfs_emit(buf
, "%u\n", max_freq
);
1089 static ssize_t
show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy
*policy
,
1093 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1095 freq
= READ_ONCE(cpudata
->lowest_nonlinear_freq
);
1099 return sysfs_emit(buf
, "%u\n", freq
);
1103 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we
1104 * need to expose it to sysfs.
1106 static ssize_t
show_amd_pstate_highest_perf(struct cpufreq_policy
*policy
,
1110 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1112 perf
= READ_ONCE(cpudata
->highest_perf
);
1114 return sysfs_emit(buf
, "%u\n", perf
);
1117 static ssize_t
show_amd_pstate_prefcore_ranking(struct cpufreq_policy
*policy
,
1121 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1123 perf
= READ_ONCE(cpudata
->prefcore_ranking
);
1125 return sysfs_emit(buf
, "%u\n", perf
);
1128 static ssize_t
show_amd_pstate_hw_prefcore(struct cpufreq_policy
*policy
,
1132 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1134 hw_prefcore
= READ_ONCE(cpudata
->hw_prefcore
);
1136 return sysfs_emit(buf
, "%s\n", str_enabled_disabled(hw_prefcore
));
1139 static ssize_t
show_energy_performance_available_preferences(
1140 struct cpufreq_policy
*policy
, char *buf
)
1144 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1146 if (cpudata
->policy
== CPUFREQ_POLICY_PERFORMANCE
)
1147 return sysfs_emit_at(buf
, offset
, "%s\n",
1148 energy_perf_strings
[EPP_INDEX_PERFORMANCE
]);
1150 while (energy_perf_strings
[i
] != NULL
)
1151 offset
+= sysfs_emit_at(buf
, offset
, "%s ", energy_perf_strings
[i
++]);
1153 offset
+= sysfs_emit_at(buf
, offset
, "\n");
1158 static ssize_t
store_energy_performance_preference(
1159 struct cpufreq_policy
*policy
, const char *buf
, size_t count
)
1161 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1162 char str_preference
[21];
1165 ret
= sscanf(buf
, "%20s", str_preference
);
1169 ret
= match_string(energy_perf_strings
, -1, str_preference
);
1173 mutex_lock(&amd_pstate_limits_lock
);
1174 ret
= amd_pstate_set_energy_pref_index(cpudata
, ret
);
1175 mutex_unlock(&amd_pstate_limits_lock
);
1177 return ret
?: count
;
1180 static ssize_t
show_energy_performance_preference(
1181 struct cpufreq_policy
*policy
, char *buf
)
1183 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1186 preference
= amd_pstate_get_energy_pref_index(cpudata
);
1190 return sysfs_emit(buf
, "%s\n", energy_perf_strings
[preference
]);
1193 static void amd_pstate_driver_cleanup(void)
1195 amd_pstate_cppc_enable(false);
1196 cppc_state
= AMD_PSTATE_DISABLE
;
1197 current_pstate_driver
= NULL
;
1200 static int amd_pstate_set_driver(int mode_idx
)
1202 if (mode_idx
>= AMD_PSTATE_DISABLE
&& mode_idx
< AMD_PSTATE_MAX
) {
1203 cppc_state
= mode_idx
;
1204 if (cppc_state
== AMD_PSTATE_DISABLE
)
1205 pr_info("driver is explicitly disabled\n");
1207 if (cppc_state
== AMD_PSTATE_ACTIVE
)
1208 current_pstate_driver
= &amd_pstate_epp_driver
;
1210 if (cppc_state
== AMD_PSTATE_PASSIVE
|| cppc_state
== AMD_PSTATE_GUIDED
)
1211 current_pstate_driver
= &amd_pstate_driver
;
1219 static int amd_pstate_register_driver(int mode
)
1223 ret
= amd_pstate_set_driver(mode
);
1229 ret
= amd_pstate_cppc_enable(true);
1231 pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n",
1233 amd_pstate_driver_cleanup();
1237 ret
= cpufreq_register_driver(current_pstate_driver
);
1239 amd_pstate_driver_cleanup();
1246 static int amd_pstate_unregister_driver(int dummy
)
1248 cpufreq_unregister_driver(current_pstate_driver
);
1249 amd_pstate_driver_cleanup();
1253 static int amd_pstate_change_mode_without_dvr_change(int mode
)
1259 if (cpu_feature_enabled(X86_FEATURE_CPPC
) || cppc_state
== AMD_PSTATE_ACTIVE
)
1262 for_each_present_cpu(cpu
) {
1263 cppc_set_auto_sel(cpu
, (cppc_state
== AMD_PSTATE_PASSIVE
) ? 0 : 1);
1269 static int amd_pstate_change_driver_mode(int mode
)
1273 ret
= amd_pstate_unregister_driver(0);
1277 ret
= amd_pstate_register_driver(mode
);
1284 static cppc_mode_transition_fn mode_state_machine
[AMD_PSTATE_MAX
][AMD_PSTATE_MAX
] = {
1285 [AMD_PSTATE_DISABLE
] = {
1286 [AMD_PSTATE_DISABLE
] = NULL
,
1287 [AMD_PSTATE_PASSIVE
] = amd_pstate_register_driver
,
1288 [AMD_PSTATE_ACTIVE
] = amd_pstate_register_driver
,
1289 [AMD_PSTATE_GUIDED
] = amd_pstate_register_driver
,
1291 [AMD_PSTATE_PASSIVE
] = {
1292 [AMD_PSTATE_DISABLE
] = amd_pstate_unregister_driver
,
1293 [AMD_PSTATE_PASSIVE
] = NULL
,
1294 [AMD_PSTATE_ACTIVE
] = amd_pstate_change_driver_mode
,
1295 [AMD_PSTATE_GUIDED
] = amd_pstate_change_mode_without_dvr_change
,
1297 [AMD_PSTATE_ACTIVE
] = {
1298 [AMD_PSTATE_DISABLE
] = amd_pstate_unregister_driver
,
1299 [AMD_PSTATE_PASSIVE
] = amd_pstate_change_driver_mode
,
1300 [AMD_PSTATE_ACTIVE
] = NULL
,
1301 [AMD_PSTATE_GUIDED
] = amd_pstate_change_driver_mode
,
1303 [AMD_PSTATE_GUIDED
] = {
1304 [AMD_PSTATE_DISABLE
] = amd_pstate_unregister_driver
,
1305 [AMD_PSTATE_PASSIVE
] = amd_pstate_change_mode_without_dvr_change
,
1306 [AMD_PSTATE_ACTIVE
] = amd_pstate_change_driver_mode
,
1307 [AMD_PSTATE_GUIDED
] = NULL
,
1311 static ssize_t
amd_pstate_show_status(char *buf
)
1313 if (!current_pstate_driver
)
1314 return sysfs_emit(buf
, "disable\n");
1316 return sysfs_emit(buf
, "%s\n", amd_pstate_mode_string
[cppc_state
]);
1319 int amd_pstate_update_status(const char *buf
, size_t size
)
1323 if (size
> strlen("passive") || size
< strlen("active"))
1326 mode_idx
= get_mode_idx_from_str(buf
, size
);
1328 if (mode_idx
< 0 || mode_idx
>= AMD_PSTATE_MAX
)
1331 if (mode_state_machine
[cppc_state
][mode_idx
])
1332 return mode_state_machine
[cppc_state
][mode_idx
](mode_idx
);
1336 EXPORT_SYMBOL_GPL(amd_pstate_update_status
);
1338 static ssize_t
status_show(struct device
*dev
,
1339 struct device_attribute
*attr
, char *buf
)
1343 mutex_lock(&amd_pstate_driver_lock
);
1344 ret
= amd_pstate_show_status(buf
);
1345 mutex_unlock(&amd_pstate_driver_lock
);
1350 static ssize_t
status_store(struct device
*a
, struct device_attribute
*b
,
1351 const char *buf
, size_t count
)
1353 char *p
= memchr(buf
, '\n', count
);
1356 mutex_lock(&amd_pstate_driver_lock
);
1357 ret
= amd_pstate_update_status(buf
, p
? p
- buf
: count
);
1358 mutex_unlock(&amd_pstate_driver_lock
);
1360 return ret
< 0 ? ret
: count
;
1363 static ssize_t
prefcore_show(struct device
*dev
,
1364 struct device_attribute
*attr
, char *buf
)
1366 return sysfs_emit(buf
, "%s\n", str_enabled_disabled(amd_pstate_prefcore
));
1369 cpufreq_freq_attr_ro(amd_pstate_max_freq
);
1370 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq
);
1372 cpufreq_freq_attr_ro(amd_pstate_highest_perf
);
1373 cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking
);
1374 cpufreq_freq_attr_ro(amd_pstate_hw_prefcore
);
1375 cpufreq_freq_attr_rw(energy_performance_preference
);
1376 cpufreq_freq_attr_ro(energy_performance_available_preferences
);
1377 static DEVICE_ATTR_RW(status
);
1378 static DEVICE_ATTR_RO(prefcore
);
1380 static struct freq_attr
*amd_pstate_attr
[] = {
1381 &amd_pstate_max_freq
,
1382 &amd_pstate_lowest_nonlinear_freq
,
1383 &amd_pstate_highest_perf
,
1384 &amd_pstate_prefcore_ranking
,
1385 &amd_pstate_hw_prefcore
,
1389 static struct freq_attr
*amd_pstate_epp_attr
[] = {
1390 &amd_pstate_max_freq
,
1391 &amd_pstate_lowest_nonlinear_freq
,
1392 &amd_pstate_highest_perf
,
1393 &amd_pstate_prefcore_ranking
,
1394 &amd_pstate_hw_prefcore
,
1395 &energy_performance_preference
,
1396 &energy_performance_available_preferences
,
1400 static struct attribute
*pstate_global_attributes
[] = {
1401 &dev_attr_status
.attr
,
1402 &dev_attr_prefcore
.attr
,
1406 static const struct attribute_group amd_pstate_global_attr_group
= {
1407 .name
= "amd_pstate",
1408 .attrs
= pstate_global_attributes
,
1411 static bool amd_pstate_acpi_pm_profile_server(void)
1413 switch (acpi_gbl_FADT
.preferred_profile
) {
1414 case PM_ENTERPRISE_SERVER
:
1415 case PM_SOHO_SERVER
:
1416 case PM_PERFORMANCE_SERVER
:
1422 static bool amd_pstate_acpi_pm_profile_undefined(void)
1424 if (acpi_gbl_FADT
.preferred_profile
== PM_UNSPECIFIED
)
1426 if (acpi_gbl_FADT
.preferred_profile
>= NR_PM_PROFILES
)
1431 static int amd_pstate_epp_cpu_init(struct cpufreq_policy
*policy
)
1433 int min_freq
, max_freq
, ret
;
1434 struct amd_cpudata
*cpudata
;
1439 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
1440 * which is ideal for initialization process.
1442 amd_perf_ctl_reset(policy
->cpu
);
1443 dev
= get_cpu_device(policy
->cpu
);
1447 cpudata
= kzalloc(sizeof(*cpudata
), GFP_KERNEL
);
1451 cpudata
->cpu
= policy
->cpu
;
1452 cpudata
->epp_policy
= 0;
1454 ret
= amd_pstate_init_perf(cpudata
);
1458 amd_pstate_init_prefcore(cpudata
);
1460 ret
= amd_pstate_init_freq(cpudata
);
1464 ret
= amd_pstate_init_boost_support(cpudata
);
1468 min_freq
= READ_ONCE(cpudata
->min_freq
);
1469 max_freq
= READ_ONCE(cpudata
->max_freq
);
1471 policy
->cpuinfo
.min_freq
= min_freq
;
1472 policy
->cpuinfo
.max_freq
= max_freq
;
1473 /* It will be updated by governor */
1474 policy
->cur
= policy
->cpuinfo
.min_freq
;
1476 policy
->driver_data
= cpudata
;
1478 cpudata
->epp_cached
= cpudata
->epp_default
= amd_pstate_get_epp(cpudata
, 0);
1480 policy
->min
= policy
->cpuinfo
.min_freq
;
1481 policy
->max
= policy
->cpuinfo
.max_freq
;
1483 policy
->boost_enabled
= READ_ONCE(cpudata
->boost_supported
);
1486 * Set the policy to provide a valid fallback value in case
1487 * the default cpufreq governor is neither powersave nor performance.
1489 if (amd_pstate_acpi_pm_profile_server() ||
1490 amd_pstate_acpi_pm_profile_undefined())
1491 policy
->policy
= CPUFREQ_POLICY_PERFORMANCE
;
1493 policy
->policy
= CPUFREQ_POLICY_POWERSAVE
;
1495 if (cpu_feature_enabled(X86_FEATURE_CPPC
)) {
1496 ret
= rdmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_REQ
, &value
);
1499 WRITE_ONCE(cpudata
->cppc_req_cached
, value
);
1501 ret
= rdmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_CAP1
, &value
);
1504 WRITE_ONCE(cpudata
->cppc_cap1_cached
, value
);
1507 current_pstate_driver
->adjust_perf
= NULL
;
1516 static void amd_pstate_epp_cpu_exit(struct cpufreq_policy
*policy
)
1518 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1522 policy
->driver_data
= NULL
;
1525 pr_debug("CPU %d exiting\n", policy
->cpu
);
1528 static int amd_pstate_epp_update_limit(struct cpufreq_policy
*policy
)
1530 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1531 u32 max_perf
, min_perf
;
1535 max_perf
= READ_ONCE(cpudata
->highest_perf
);
1536 min_perf
= READ_ONCE(cpudata
->lowest_perf
);
1537 amd_pstate_update_min_max_limit(policy
);
1539 max_perf
= clamp_t(unsigned long, max_perf
, cpudata
->min_limit_perf
,
1540 cpudata
->max_limit_perf
);
1541 min_perf
= clamp_t(unsigned long, min_perf
, cpudata
->min_limit_perf
,
1542 cpudata
->max_limit_perf
);
1543 value
= READ_ONCE(cpudata
->cppc_req_cached
);
1545 if (cpudata
->policy
== CPUFREQ_POLICY_PERFORMANCE
)
1546 min_perf
= min(cpudata
->nominal_perf
, max_perf
);
1548 /* Initial min/max values for CPPC Performance Controls Register */
1549 value
&= ~AMD_CPPC_MIN_PERF(~0L);
1550 value
|= AMD_CPPC_MIN_PERF(min_perf
);
1552 value
&= ~AMD_CPPC_MAX_PERF(~0L);
1553 value
|= AMD_CPPC_MAX_PERF(max_perf
);
1555 /* CPPC EPP feature require to set zero to the desire perf bit */
1556 value
&= ~AMD_CPPC_DES_PERF(~0L);
1557 value
|= AMD_CPPC_DES_PERF(0);
1559 cpudata
->epp_policy
= cpudata
->policy
;
1561 /* Get BIOS pre-defined epp value */
1562 epp
= amd_pstate_get_epp(cpudata
, value
);
1565 * This return value can only be negative for shared_memory
1566 * systems where EPP register read/write not supported.
1571 if (cpudata
->policy
== CPUFREQ_POLICY_PERFORMANCE
)
1574 WRITE_ONCE(cpudata
->cppc_req_cached
, value
);
1575 return amd_pstate_set_epp(cpudata
, epp
);
1578 static int amd_pstate_epp_set_policy(struct cpufreq_policy
*policy
)
1580 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1583 if (!policy
->cpuinfo
.max_freq
)
1586 pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
1587 policy
->cpuinfo
.max_freq
, policy
->max
);
1589 cpudata
->policy
= policy
->policy
;
1591 ret
= amd_pstate_epp_update_limit(policy
);
1596 * policy->cur is never updated with the amd_pstate_epp driver, but it
1597 * is used as a stale frequency value. So, keep it within limits.
1599 policy
->cur
= policy
->min
;
1604 static void amd_pstate_epp_reenable(struct amd_cpudata
*cpudata
)
1606 struct cppc_perf_ctrls perf_ctrls
;
1607 u64 value
, max_perf
;
1610 ret
= amd_pstate_cppc_enable(true);
1612 pr_err("failed to enable amd pstate during resume, return %d\n", ret
);
1614 value
= READ_ONCE(cpudata
->cppc_req_cached
);
1615 max_perf
= READ_ONCE(cpudata
->highest_perf
);
1617 if (cpu_feature_enabled(X86_FEATURE_CPPC
)) {
1618 wrmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_REQ
, value
);
1620 perf_ctrls
.max_perf
= max_perf
;
1621 cppc_set_perf(cpudata
->cpu
, &perf_ctrls
);
1622 perf_ctrls
.energy_perf
= AMD_CPPC_ENERGY_PERF_PREF(cpudata
->epp_cached
);
1623 cppc_set_epp_perf(cpudata
->cpu
, &perf_ctrls
, 1);
1627 static int amd_pstate_epp_cpu_online(struct cpufreq_policy
*policy
)
1629 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1631 pr_debug("AMD CPU Core %d going online\n", cpudata
->cpu
);
1633 if (cppc_state
== AMD_PSTATE_ACTIVE
) {
1634 amd_pstate_epp_reenable(cpudata
);
1635 cpudata
->suspended
= false;
1641 static void amd_pstate_epp_offline(struct cpufreq_policy
*policy
)
1643 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1644 struct cppc_perf_ctrls perf_ctrls
;
1648 min_perf
= READ_ONCE(cpudata
->lowest_perf
);
1649 value
= READ_ONCE(cpudata
->cppc_req_cached
);
1651 mutex_lock(&amd_pstate_limits_lock
);
1652 if (cpu_feature_enabled(X86_FEATURE_CPPC
)) {
1653 cpudata
->epp_policy
= CPUFREQ_POLICY_UNKNOWN
;
1655 /* Set max perf same as min perf */
1656 value
&= ~AMD_CPPC_MAX_PERF(~0L);
1657 value
|= AMD_CPPC_MAX_PERF(min_perf
);
1658 value
&= ~AMD_CPPC_MIN_PERF(~0L);
1659 value
|= AMD_CPPC_MIN_PERF(min_perf
);
1660 wrmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_REQ
, value
);
1662 perf_ctrls
.desired_perf
= 0;
1663 perf_ctrls
.min_perf
= min_perf
;
1664 perf_ctrls
.max_perf
= min_perf
;
1665 cppc_set_perf(cpudata
->cpu
, &perf_ctrls
);
1666 perf_ctrls
.energy_perf
= AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE
);
1667 cppc_set_epp_perf(cpudata
->cpu
, &perf_ctrls
, 1);
1669 mutex_unlock(&amd_pstate_limits_lock
);
1672 static int amd_pstate_epp_cpu_offline(struct cpufreq_policy
*policy
)
1674 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1676 pr_debug("AMD CPU Core %d going offline\n", cpudata
->cpu
);
1678 if (cpudata
->suspended
)
1681 if (cppc_state
== AMD_PSTATE_ACTIVE
)
1682 amd_pstate_epp_offline(policy
);
1687 static int amd_pstate_epp_suspend(struct cpufreq_policy
*policy
)
1689 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1692 /* avoid suspending when EPP is not enabled */
1693 if (cppc_state
!= AMD_PSTATE_ACTIVE
)
1696 /* set this flag to avoid setting core offline*/
1697 cpudata
->suspended
= true;
1699 /* disable CPPC in lowlevel firmware */
1700 ret
= amd_pstate_cppc_enable(false);
1702 pr_err("failed to suspend, return %d\n", ret
);
1707 static int amd_pstate_epp_resume(struct cpufreq_policy
*policy
)
1709 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1711 if (cpudata
->suspended
) {
1712 mutex_lock(&amd_pstate_limits_lock
);
1714 /* enable amd pstate from suspend state*/
1715 amd_pstate_epp_reenable(cpudata
);
1717 mutex_unlock(&amd_pstate_limits_lock
);
1719 cpudata
->suspended
= false;
1725 static struct cpufreq_driver amd_pstate_driver
= {
1726 .flags
= CPUFREQ_CONST_LOOPS
| CPUFREQ_NEED_UPDATE_LIMITS
,
1727 .verify
= amd_pstate_verify
,
1728 .target
= amd_pstate_target
,
1729 .fast_switch
= amd_pstate_fast_switch
,
1730 .init
= amd_pstate_cpu_init
,
1731 .exit
= amd_pstate_cpu_exit
,
1732 .suspend
= amd_pstate_cpu_suspend
,
1733 .resume
= amd_pstate_cpu_resume
,
1734 .set_boost
= amd_pstate_set_boost
,
1735 .update_limits
= amd_pstate_update_limits
,
1736 .name
= "amd-pstate",
1737 .attr
= amd_pstate_attr
,
1740 static struct cpufreq_driver amd_pstate_epp_driver
= {
1741 .flags
= CPUFREQ_CONST_LOOPS
,
1742 .verify
= amd_pstate_verify
,
1743 .setpolicy
= amd_pstate_epp_set_policy
,
1744 .init
= amd_pstate_epp_cpu_init
,
1745 .exit
= amd_pstate_epp_cpu_exit
,
1746 .offline
= amd_pstate_epp_cpu_offline
,
1747 .online
= amd_pstate_epp_cpu_online
,
1748 .suspend
= amd_pstate_epp_suspend
,
1749 .resume
= amd_pstate_epp_resume
,
1750 .update_limits
= amd_pstate_update_limits
,
1751 .set_boost
= amd_pstate_set_boost
,
1752 .name
= "amd-pstate-epp",
1753 .attr
= amd_pstate_epp_attr
,
1757 * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
1758 * show the debug message that helps to check if the CPU has CPPC support for loading issue.
1760 static bool amd_cppc_supported(void)
1762 struct cpuinfo_x86
*c
= &cpu_data(0);
1765 if ((boot_cpu_data
.x86
== 0x17) && (boot_cpu_data
.x86_model
< 0x30)) {
1766 pr_debug_once("CPPC feature is not supported by the processor\n");
1771 * If the CPPC feature is disabled in the BIOS for processors
1772 * that support MSR-based CPPC, the AMD Pstate driver may not
1773 * function correctly.
1775 * For such processors, check the CPPC flag and display a
1776 * warning message if the platform supports CPPC.
1778 * Note: The code check below will not abort the driver
1779 * registration process because of the code is added for
1780 * debugging purposes. Besides, it may still be possible for
1781 * the driver to work using the shared-memory mechanism.
1783 if (!cpu_feature_enabled(X86_FEATURE_CPPC
)) {
1784 if (cpu_feature_enabled(X86_FEATURE_ZEN2
)) {
1785 switch (c
->x86_model
) {
1791 } else if (cpu_feature_enabled(X86_FEATURE_ZEN3
) ||
1792 cpu_feature_enabled(X86_FEATURE_ZEN4
)) {
1793 switch (c
->x86_model
) {
1799 } else if (cpu_feature_enabled(X86_FEATURE_ZEN5
)) {
1805 pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n"
1806 "Please enable it if your BIOS has the CPPC option.\n");
1810 static int __init
amd_pstate_init(void)
1812 struct device
*dev_root
;
1815 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_AMD
)
1818 /* show debug message only if CPPC is not supported */
1819 if (!amd_cppc_supported())
1822 /* show warning message when BIOS broken or ACPI disabled */
1823 if (!acpi_cpc_valid()) {
1824 pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
1828 /* don't keep reloading if cpufreq_driver exists */
1829 if (cpufreq_get_current_driver())
1834 /* check if this machine need CPPC quirks */
1835 dmi_check_system(amd_pstate_quirks_table
);
1838 * determine the driver mode from the command line or kernel config.
1839 * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED.
1840 * command line options will override the kernel config settings.
1843 if (cppc_state
== AMD_PSTATE_UNDEFINED
) {
1844 /* Disable on the following configs by default:
1845 * 1. Undefined platforms
1846 * 2. Server platforms with CPUs older than Family 0x1A.
1848 if (amd_pstate_acpi_pm_profile_undefined() ||
1849 (amd_pstate_acpi_pm_profile_server() && boot_cpu_data
.x86
< 0x1A)) {
1850 pr_info("driver load is disabled, boot with specific mode to enable this\n");
1853 /* get driver mode from kernel config option [1:4] */
1854 cppc_state
= CONFIG_X86_AMD_PSTATE_DEFAULT_MODE
;
1857 if (cppc_state
== AMD_PSTATE_DISABLE
) {
1858 pr_info("driver load is disabled, boot with specific mode to enable this\n");
1862 /* capability check */
1863 if (cpu_feature_enabled(X86_FEATURE_CPPC
)) {
1864 pr_debug("AMD CPPC MSR based functionality is supported\n");
1866 pr_debug("AMD CPPC shared memory based functionality is supported\n");
1867 static_call_update(amd_pstate_cppc_enable
, shmem_cppc_enable
);
1868 static_call_update(amd_pstate_init_perf
, shmem_init_perf
);
1869 static_call_update(amd_pstate_update_perf
, shmem_update_perf
);
1872 ret
= amd_pstate_register_driver(cppc_state
);
1874 pr_err("failed to register with return %d\n", ret
);
1878 if (amd_pstate_prefcore
) {
1879 ret
= amd_detect_prefcore(&amd_pstate_prefcore
);
1884 dev_root
= bus_get_dev_root(&cpu_subsys
);
1886 ret
= sysfs_create_group(&dev_root
->kobj
, &amd_pstate_global_attr_group
);
1887 put_device(dev_root
);
1889 pr_err("sysfs attribute export failed with error %d.\n", ret
);
1890 goto global_attr_free
;
1897 cpufreq_unregister_driver(current_pstate_driver
);
1898 amd_pstate_cppc_enable(false);
1901 device_initcall(amd_pstate_init
);
1903 static int __init
amd_pstate_param(char *str
)
1912 mode_idx
= get_mode_idx_from_str(str
, size
);
1914 return amd_pstate_set_driver(mode_idx
);
1917 static int __init
amd_prefcore_param(char *str
)
1919 if (!strcmp(str
, "disable"))
1920 amd_pstate_prefcore
= false;
1925 early_param("amd_pstate", amd_pstate_param
);
1926 early_param("amd_prefcore", amd_prefcore_param
);
1928 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
1929 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");