1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * amd-pstate.c - AMD Processor P-state Frequency Driver
5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
7 * Author: Huang Rui <ray.huang@amd.com>
9 * AMD P-State introduces a new CPU performance scaling design for AMD
10 * processors using the ACPI Collaborative Performance and Power Control (CPPC)
11 * feature which works with the AMD SMU firmware providing a finer grained
12 * frequency control range. It is to replace the legacy ACPI P-States control,
13 * allows a flexible, low-latency interface for the Linux kernel to directly
14 * communicate the performance hints to hardware.
16 * AMD P-State is supported on recent AMD Zen base CPU series include some of
17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD
18 * P-State supported system. And there are two types of hardware implementations
19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution.
20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
25 #include <linux/bitfield.h>
26 #include <linux/kernel.h>
27 #include <linux/module.h>
28 #include <linux/init.h>
29 #include <linux/smp.h>
30 #include <linux/sched.h>
31 #include <linux/cpufreq.h>
32 #include <linux/compiler.h>
33 #include <linux/dmi.h>
34 #include <linux/slab.h>
35 #include <linux/acpi.h>
37 #include <linux/delay.h>
38 #include <linux/uaccess.h>
39 #include <linux/static_call.h>
40 #include <linux/topology.h>
42 #include <acpi/processor.h>
43 #include <acpi/cppc_acpi.h>
46 #include <asm/processor.h>
47 #include <asm/cpufeature.h>
48 #include <asm/cpu_device_id.h>
50 #include "amd-pstate.h"
51 #include "amd-pstate-trace.h"
53 #define AMD_PSTATE_TRANSITION_LATENCY 20000
54 #define AMD_PSTATE_TRANSITION_DELAY 1000
55 #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
57 #define AMD_CPPC_EPP_PERFORMANCE 0x00
58 #define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
59 #define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
60 #define AMD_CPPC_EPP_POWERSAVE 0xFF
62 static const char * const amd_pstate_mode_string
[] = {
63 [AMD_PSTATE_UNDEFINED
] = "undefined",
64 [AMD_PSTATE_DISABLE
] = "disable",
65 [AMD_PSTATE_PASSIVE
] = "passive",
66 [AMD_PSTATE_ACTIVE
] = "active",
67 [AMD_PSTATE_GUIDED
] = "guided",
71 const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode
)
73 if (mode
< 0 || mode
>= AMD_PSTATE_MAX
)
75 return amd_pstate_mode_string
[mode
];
77 EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string
);
84 static struct cpufreq_driver
*current_pstate_driver
;
85 static struct cpufreq_driver amd_pstate_driver
;
86 static struct cpufreq_driver amd_pstate_epp_driver
;
87 static int cppc_state
= AMD_PSTATE_UNDEFINED
;
88 static bool cppc_enabled
;
89 static bool amd_pstate_prefcore
= true;
90 static struct quirk_entry
*quirks
;
92 #define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
93 #define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
94 #define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
95 #define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
98 * AMD Energy Preference Performance (EPP)
99 * The EPP is used in the CCLK DPM controller to drive
100 * the frequency that a core is going to operate during
101 * short periods of activity. EPP values will be utilized for
102 * different OS profiles (balanced, performance, power savings)
103 * display strings corresponding to EPP index in the
104 * energy_perf_strings[]
106 *-------------------------------------
109 * 2 balance_performance
113 enum energy_perf_value_index
{
114 EPP_INDEX_DEFAULT
= 0,
115 EPP_INDEX_PERFORMANCE
,
116 EPP_INDEX_BALANCE_PERFORMANCE
,
117 EPP_INDEX_BALANCE_POWERSAVE
,
121 static const char * const energy_perf_strings
[] = {
122 [EPP_INDEX_DEFAULT
] = "default",
123 [EPP_INDEX_PERFORMANCE
] = "performance",
124 [EPP_INDEX_BALANCE_PERFORMANCE
] = "balance_performance",
125 [EPP_INDEX_BALANCE_POWERSAVE
] = "balance_power",
126 [EPP_INDEX_POWERSAVE
] = "power",
130 static unsigned int epp_values
[] = {
131 [EPP_INDEX_DEFAULT
] = 0,
132 [EPP_INDEX_PERFORMANCE
] = AMD_CPPC_EPP_PERFORMANCE
,
133 [EPP_INDEX_BALANCE_PERFORMANCE
] = AMD_CPPC_EPP_BALANCE_PERFORMANCE
,
134 [EPP_INDEX_BALANCE_POWERSAVE
] = AMD_CPPC_EPP_BALANCE_POWERSAVE
,
135 [EPP_INDEX_POWERSAVE
] = AMD_CPPC_EPP_POWERSAVE
,
138 typedef int (*cppc_mode_transition_fn
)(int);
140 static struct quirk_entry quirk_amd_7k62
= {
141 .nominal_freq
= 2600,
145 static struct quirk_entry quirk_amd_mts
= {
146 .nominal_freq
= 3600,
150 static int __init
dmi_matched_7k62_bios_bug(const struct dmi_system_id
*dmi
)
153 * match the broken bios for family 17h processor support CPPC V2
154 * broken BIOS lack of nominal_freq and lowest_freq capabilities
155 * definition in ACPI tables
157 if (cpu_feature_enabled(X86_FEATURE_ZEN2
)) {
158 quirks
= dmi
->driver_data
;
159 pr_info("Overriding nominal and lowest frequencies for %s\n", dmi
->ident
);
166 static int __init
dmi_matched_mts_bios_bug(const struct dmi_system_id
*dmi
)
169 * match the broken bios for ryzen 3000 series processor support CPPC V2
170 * broken BIOS lack of nominal_freq and lowest_freq capabilities
171 * definition in ACPI tables
173 if (cpu_feature_enabled(X86_FEATURE_ZEN2
)) {
174 quirks
= dmi
->driver_data
;
175 pr_info("Overriding nominal and lowest frequencies for %s\n", dmi
->ident
);
181 static const struct dmi_system_id amd_pstate_quirks_table
[] __initconst
= {
183 .callback
= dmi_matched_7k62_bios_bug
,
184 .ident
= "AMD EPYC 7K62",
186 DMI_MATCH(DMI_BIOS_VERSION
, "5.14"),
187 DMI_MATCH(DMI_BIOS_RELEASE
, "12/12/2019"),
189 .driver_data
= &quirk_amd_7k62
,
192 .callback
= dmi_matched_mts_bios_bug
,
193 .ident
= "AMD Ryzen 3000",
195 DMI_MATCH(DMI_PRODUCT_NAME
, "B450M MORTAR MAX (MS-7B89)"),
196 DMI_MATCH(DMI_BIOS_RELEASE
, "06/10/2020"),
197 DMI_MATCH(DMI_BIOS_VERSION
, "5.14"),
199 .driver_data
= &quirk_amd_mts
,
203 MODULE_DEVICE_TABLE(dmi
, amd_pstate_quirks_table
);
205 static inline int get_mode_idx_from_str(const char *str
, size_t size
)
209 for (i
=0; i
< AMD_PSTATE_MAX
; i
++) {
210 if (!strncmp(str
, amd_pstate_mode_string
[i
], size
))
216 static DEFINE_MUTEX(amd_pstate_limits_lock
);
217 static DEFINE_MUTEX(amd_pstate_driver_lock
);
219 static s16
msr_get_epp(struct amd_cpudata
*cpudata
)
224 ret
= rdmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_REQ
, &value
);
226 pr_debug("Could not retrieve energy perf value (%d)\n", ret
);
230 return FIELD_GET(AMD_CPPC_EPP_PERF_MASK
, value
);
233 DEFINE_STATIC_CALL(amd_pstate_get_epp
, msr_get_epp
);
235 static inline s16
amd_pstate_get_epp(struct amd_cpudata
*cpudata
)
237 return static_call(amd_pstate_get_epp
)(cpudata
);
240 static s16
shmem_get_epp(struct amd_cpudata
*cpudata
)
245 ret
= cppc_get_epp_perf(cpudata
->cpu
, &epp
);
247 pr_debug("Could not retrieve energy perf value (%d)\n", ret
);
251 return (s16
)(epp
& 0xff);
254 static int msr_update_perf(struct amd_cpudata
*cpudata
, u32 min_perf
,
255 u32 des_perf
, u32 max_perf
, u32 epp
, bool fast_switch
)
259 value
= prev
= READ_ONCE(cpudata
->cppc_req_cached
);
261 value
&= ~(AMD_CPPC_MAX_PERF_MASK
| AMD_CPPC_MIN_PERF_MASK
|
262 AMD_CPPC_DES_PERF_MASK
| AMD_CPPC_EPP_PERF_MASK
);
263 value
|= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK
, max_perf
);
264 value
|= FIELD_PREP(AMD_CPPC_DES_PERF_MASK
, des_perf
);
265 value
|= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK
, min_perf
);
266 value
|= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK
, epp
);
272 wrmsrl(MSR_AMD_CPPC_REQ
, value
);
275 int ret
= wrmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_REQ
, value
);
281 WRITE_ONCE(cpudata
->cppc_req_cached
, value
);
282 WRITE_ONCE(cpudata
->epp_cached
, epp
);
287 DEFINE_STATIC_CALL(amd_pstate_update_perf
, msr_update_perf
);
289 static inline int amd_pstate_update_perf(struct amd_cpudata
*cpudata
,
290 u32 min_perf
, u32 des_perf
,
291 u32 max_perf
, u32 epp
,
294 return static_call(amd_pstate_update_perf
)(cpudata
, min_perf
, des_perf
,
295 max_perf
, epp
, fast_switch
);
298 static int msr_set_epp(struct amd_cpudata
*cpudata
, u32 epp
)
303 value
= prev
= READ_ONCE(cpudata
->cppc_req_cached
);
304 value
&= ~AMD_CPPC_EPP_PERF_MASK
;
305 value
|= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK
, epp
);
310 ret
= wrmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_REQ
, value
);
312 pr_err("failed to set energy perf value (%d)\n", ret
);
316 /* update both so that msr_update_perf() can effectively check */
317 WRITE_ONCE(cpudata
->epp_cached
, epp
);
318 WRITE_ONCE(cpudata
->cppc_req_cached
, value
);
323 DEFINE_STATIC_CALL(amd_pstate_set_epp
, msr_set_epp
);
325 static inline int amd_pstate_set_epp(struct amd_cpudata
*cpudata
, u32 epp
)
327 return static_call(amd_pstate_set_epp
)(cpudata
, epp
);
330 static int shmem_set_epp(struct amd_cpudata
*cpudata
, u32 epp
)
333 struct cppc_perf_ctrls perf_ctrls
;
335 if (epp
== cpudata
->epp_cached
)
338 perf_ctrls
.energy_perf
= epp
;
339 ret
= cppc_set_epp_perf(cpudata
->cpu
, &perf_ctrls
, 1);
341 pr_debug("failed to set energy perf value (%d)\n", ret
);
344 WRITE_ONCE(cpudata
->epp_cached
, epp
);
349 static int amd_pstate_set_energy_pref_index(struct cpufreq_policy
*policy
,
352 struct amd_cpudata
*cpudata
= policy
->driver_data
;
356 epp
= cpudata
->epp_default
;
358 epp
= epp_values
[pref_index
];
360 if (epp
> 0 && cpudata
->policy
== CPUFREQ_POLICY_PERFORMANCE
) {
361 pr_debug("EPP cannot be set under performance policy\n");
365 if (trace_amd_pstate_epp_perf_enabled()) {
366 trace_amd_pstate_epp_perf(cpudata
->cpu
, cpudata
->highest_perf
,
368 FIELD_GET(AMD_CPPC_MIN_PERF_MASK
, cpudata
->cppc_req_cached
),
369 FIELD_GET(AMD_CPPC_MAX_PERF_MASK
, cpudata
->cppc_req_cached
),
370 policy
->boost_enabled
);
373 return amd_pstate_set_epp(cpudata
, epp
);
376 static inline int msr_cppc_enable(bool enable
)
379 unsigned long logical_proc_id_mask
= 0;
382 * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared.
387 if (enable
== cppc_enabled
)
390 for_each_present_cpu(cpu
) {
391 unsigned long logical_id
= topology_logical_package_id(cpu
);
393 if (test_bit(logical_id
, &logical_proc_id_mask
))
396 set_bit(logical_id
, &logical_proc_id_mask
);
398 ret
= wrmsrl_safe_on_cpu(cpu
, MSR_AMD_CPPC_ENABLE
,
404 cppc_enabled
= enable
;
408 static int shmem_cppc_enable(bool enable
)
411 struct cppc_perf_ctrls perf_ctrls
;
413 if (enable
== cppc_enabled
)
416 for_each_present_cpu(cpu
) {
417 ret
= cppc_set_enable(cpu
, enable
);
421 /* Enable autonomous mode for EPP */
422 if (cppc_state
== AMD_PSTATE_ACTIVE
) {
423 /* Set desired perf as zero to allow EPP firmware control */
424 perf_ctrls
.desired_perf
= 0;
425 ret
= cppc_set_perf(cpu
, &perf_ctrls
);
431 cppc_enabled
= enable
;
435 DEFINE_STATIC_CALL(amd_pstate_cppc_enable
, msr_cppc_enable
);
437 static inline int amd_pstate_cppc_enable(bool enable
)
439 return static_call(amd_pstate_cppc_enable
)(enable
);
442 static int msr_init_perf(struct amd_cpudata
*cpudata
)
446 int ret
= rdmsrl_safe_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_CAP1
,
451 ret
= amd_get_boost_ratio_numerator(cpudata
->cpu
, &numerator
);
455 WRITE_ONCE(cpudata
->highest_perf
, numerator
);
456 WRITE_ONCE(cpudata
->max_limit_perf
, numerator
);
457 WRITE_ONCE(cpudata
->nominal_perf
, AMD_CPPC_NOMINAL_PERF(cap1
));
458 WRITE_ONCE(cpudata
->lowest_nonlinear_perf
, AMD_CPPC_LOWNONLIN_PERF(cap1
));
459 WRITE_ONCE(cpudata
->lowest_perf
, AMD_CPPC_LOWEST_PERF(cap1
));
460 WRITE_ONCE(cpudata
->prefcore_ranking
, AMD_CPPC_HIGHEST_PERF(cap1
));
461 WRITE_ONCE(cpudata
->min_limit_perf
, AMD_CPPC_LOWEST_PERF(cap1
));
465 static int shmem_init_perf(struct amd_cpudata
*cpudata
)
467 struct cppc_perf_caps cppc_perf
;
470 int ret
= cppc_get_perf_caps(cpudata
->cpu
, &cppc_perf
);
474 ret
= amd_get_boost_ratio_numerator(cpudata
->cpu
, &numerator
);
478 WRITE_ONCE(cpudata
->highest_perf
, numerator
);
479 WRITE_ONCE(cpudata
->max_limit_perf
, numerator
);
480 WRITE_ONCE(cpudata
->nominal_perf
, cppc_perf
.nominal_perf
);
481 WRITE_ONCE(cpudata
->lowest_nonlinear_perf
,
482 cppc_perf
.lowest_nonlinear_perf
);
483 WRITE_ONCE(cpudata
->lowest_perf
, cppc_perf
.lowest_perf
);
484 WRITE_ONCE(cpudata
->prefcore_ranking
, cppc_perf
.highest_perf
);
485 WRITE_ONCE(cpudata
->min_limit_perf
, cppc_perf
.lowest_perf
);
487 if (cppc_state
== AMD_PSTATE_ACTIVE
)
490 ret
= cppc_get_auto_sel_caps(cpudata
->cpu
, &cppc_perf
);
492 pr_warn("failed to get auto_sel, ret: %d\n", ret
);
496 ret
= cppc_set_auto_sel(cpudata
->cpu
,
497 (cppc_state
== AMD_PSTATE_PASSIVE
) ? 0 : 1);
500 pr_warn("failed to set auto_sel, ret: %d\n", ret
);
505 DEFINE_STATIC_CALL(amd_pstate_init_perf
, msr_init_perf
);
507 static inline int amd_pstate_init_perf(struct amd_cpudata
*cpudata
)
509 return static_call(amd_pstate_init_perf
)(cpudata
);
512 static int shmem_update_perf(struct amd_cpudata
*cpudata
, u32 min_perf
,
513 u32 des_perf
, u32 max_perf
, u32 epp
, bool fast_switch
)
515 struct cppc_perf_ctrls perf_ctrls
;
517 if (cppc_state
== AMD_PSTATE_ACTIVE
) {
518 int ret
= shmem_set_epp(cpudata
, epp
);
524 perf_ctrls
.max_perf
= max_perf
;
525 perf_ctrls
.min_perf
= min_perf
;
526 perf_ctrls
.desired_perf
= des_perf
;
528 return cppc_set_perf(cpudata
->cpu
, &perf_ctrls
);
531 static inline bool amd_pstate_sample(struct amd_cpudata
*cpudata
)
533 u64 aperf
, mperf
, tsc
;
536 local_irq_save(flags
);
537 rdmsrl(MSR_IA32_APERF
, aperf
);
538 rdmsrl(MSR_IA32_MPERF
, mperf
);
541 if (cpudata
->prev
.mperf
== mperf
|| cpudata
->prev
.tsc
== tsc
) {
542 local_irq_restore(flags
);
546 local_irq_restore(flags
);
548 cpudata
->cur
.aperf
= aperf
;
549 cpudata
->cur
.mperf
= mperf
;
550 cpudata
->cur
.tsc
= tsc
;
551 cpudata
->cur
.aperf
-= cpudata
->prev
.aperf
;
552 cpudata
->cur
.mperf
-= cpudata
->prev
.mperf
;
553 cpudata
->cur
.tsc
-= cpudata
->prev
.tsc
;
555 cpudata
->prev
.aperf
= aperf
;
556 cpudata
->prev
.mperf
= mperf
;
557 cpudata
->prev
.tsc
= tsc
;
559 cpudata
->freq
= div64_u64((cpudata
->cur
.aperf
* cpu_khz
), cpudata
->cur
.mperf
);
564 static void amd_pstate_update(struct amd_cpudata
*cpudata
, u32 min_perf
,
565 u32 des_perf
, u32 max_perf
, bool fast_switch
, int gov_flags
)
567 unsigned long max_freq
;
568 struct cpufreq_policy
*policy
= cpufreq_cpu_get(cpudata
->cpu
);
569 u32 nominal_perf
= READ_ONCE(cpudata
->nominal_perf
);
571 des_perf
= clamp_t(unsigned long, des_perf
, min_perf
, max_perf
);
573 max_freq
= READ_ONCE(cpudata
->max_limit_freq
);
574 policy
->cur
= div_u64(des_perf
* max_freq
, max_perf
);
576 if ((cppc_state
== AMD_PSTATE_GUIDED
) && (gov_flags
& CPUFREQ_GOV_DYNAMIC_SWITCHING
)) {
581 /* limit the max perf when core performance boost feature is disabled */
582 if (!cpudata
->boost_supported
)
583 max_perf
= min_t(unsigned long, nominal_perf
, max_perf
);
585 if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata
)) {
586 trace_amd_pstate_perf(min_perf
, des_perf
, max_perf
, cpudata
->freq
,
587 cpudata
->cur
.mperf
, cpudata
->cur
.aperf
, cpudata
->cur
.tsc
,
588 cpudata
->cpu
, fast_switch
);
591 amd_pstate_update_perf(cpudata
, min_perf
, des_perf
, max_perf
, 0, fast_switch
);
593 cpufreq_cpu_put(policy
);
596 static int amd_pstate_verify(struct cpufreq_policy_data
*policy_data
)
599 * Initialize lower frequency limit (i.e.policy->min) with
600 * lowest_nonlinear_frequency which is the most energy efficient
601 * frequency. Override the initial value set by cpufreq core and
602 * amd-pstate qos_requests.
604 if (policy_data
->min
== FREQ_QOS_MIN_DEFAULT_VALUE
) {
605 struct cpufreq_policy
*policy
= cpufreq_cpu_get(policy_data
->cpu
);
606 struct amd_cpudata
*cpudata
;
611 cpudata
= policy
->driver_data
;
612 policy_data
->min
= cpudata
->lowest_nonlinear_freq
;
613 cpufreq_cpu_put(policy
);
616 cpufreq_verify_within_cpu_limits(policy_data
);
617 pr_debug("policy_max =%d, policy_min=%d\n", policy_data
->max
, policy_data
->min
);
622 static int amd_pstate_update_min_max_limit(struct cpufreq_policy
*policy
)
624 u32 max_limit_perf
, min_limit_perf
, max_perf
, max_freq
;
625 struct amd_cpudata
*cpudata
= policy
->driver_data
;
627 max_perf
= READ_ONCE(cpudata
->highest_perf
);
628 max_freq
= READ_ONCE(cpudata
->max_freq
);
629 max_limit_perf
= div_u64(policy
->max
* max_perf
, max_freq
);
630 min_limit_perf
= div_u64(policy
->min
* max_perf
, max_freq
);
632 if (cpudata
->policy
== CPUFREQ_POLICY_PERFORMANCE
)
633 min_limit_perf
= min(cpudata
->nominal_perf
, max_limit_perf
);
635 WRITE_ONCE(cpudata
->max_limit_perf
, max_limit_perf
);
636 WRITE_ONCE(cpudata
->min_limit_perf
, min_limit_perf
);
637 WRITE_ONCE(cpudata
->max_limit_freq
, policy
->max
);
638 WRITE_ONCE(cpudata
->min_limit_freq
, policy
->min
);
643 static int amd_pstate_update_freq(struct cpufreq_policy
*policy
,
644 unsigned int target_freq
, bool fast_switch
)
646 struct cpufreq_freqs freqs
;
647 struct amd_cpudata
*cpudata
= policy
->driver_data
;
648 unsigned long max_perf
, min_perf
, des_perf
, cap_perf
;
650 if (!cpudata
->max_freq
)
653 if (policy
->min
!= cpudata
->min_limit_freq
|| policy
->max
!= cpudata
->max_limit_freq
)
654 amd_pstate_update_min_max_limit(policy
);
656 cap_perf
= READ_ONCE(cpudata
->highest_perf
);
657 min_perf
= READ_ONCE(cpudata
->lowest_perf
);
660 freqs
.old
= policy
->cur
;
661 freqs
.new = target_freq
;
663 des_perf
= DIV_ROUND_CLOSEST(target_freq
* cap_perf
,
666 WARN_ON(fast_switch
&& !policy
->fast_switch_enabled
);
668 * If fast_switch is desired, then there aren't any registered
669 * transition notifiers. See comment for
670 * cpufreq_enable_fast_switch().
673 cpufreq_freq_transition_begin(policy
, &freqs
);
675 amd_pstate_update(cpudata
, min_perf
, des_perf
,
676 max_perf
, fast_switch
, policy
->governor
->flags
);
679 cpufreq_freq_transition_end(policy
, &freqs
, false);
684 static int amd_pstate_target(struct cpufreq_policy
*policy
,
685 unsigned int target_freq
,
686 unsigned int relation
)
688 return amd_pstate_update_freq(policy
, target_freq
, false);
691 static unsigned int amd_pstate_fast_switch(struct cpufreq_policy
*policy
,
692 unsigned int target_freq
)
694 if (!amd_pstate_update_freq(policy
, target_freq
, true))
699 static void amd_pstate_adjust_perf(unsigned int cpu
,
700 unsigned long _min_perf
,
701 unsigned long target_perf
,
702 unsigned long capacity
)
704 unsigned long max_perf
, min_perf
, des_perf
,
705 cap_perf
, lowest_nonlinear_perf
;
706 struct cpufreq_policy
*policy
= cpufreq_cpu_get(cpu
);
707 struct amd_cpudata
*cpudata
;
712 cpudata
= policy
->driver_data
;
714 if (policy
->min
!= cpudata
->min_limit_freq
|| policy
->max
!= cpudata
->max_limit_freq
)
715 amd_pstate_update_min_max_limit(policy
);
718 cap_perf
= READ_ONCE(cpudata
->highest_perf
);
719 lowest_nonlinear_perf
= READ_ONCE(cpudata
->lowest_nonlinear_perf
);
722 if (target_perf
< capacity
)
723 des_perf
= DIV_ROUND_UP(cap_perf
* target_perf
, capacity
);
725 min_perf
= READ_ONCE(cpudata
->lowest_perf
);
726 if (_min_perf
< capacity
)
727 min_perf
= DIV_ROUND_UP(cap_perf
* _min_perf
, capacity
);
729 if (min_perf
< lowest_nonlinear_perf
)
730 min_perf
= lowest_nonlinear_perf
;
733 if (max_perf
< min_perf
)
736 des_perf
= clamp_t(unsigned long, des_perf
, min_perf
, max_perf
);
738 amd_pstate_update(cpudata
, min_perf
, des_perf
, max_perf
, true,
739 policy
->governor
->flags
);
740 cpufreq_cpu_put(policy
);
743 static int amd_pstate_cpu_boost_update(struct cpufreq_policy
*policy
, bool on
)
745 struct amd_cpudata
*cpudata
= policy
->driver_data
;
746 u32 nominal_freq
, max_freq
;
749 nominal_freq
= READ_ONCE(cpudata
->nominal_freq
);
750 max_freq
= READ_ONCE(cpudata
->max_freq
);
753 policy
->cpuinfo
.max_freq
= max_freq
;
754 else if (policy
->cpuinfo
.max_freq
> nominal_freq
)
755 policy
->cpuinfo
.max_freq
= nominal_freq
;
757 policy
->max
= policy
->cpuinfo
.max_freq
;
759 if (cppc_state
== AMD_PSTATE_PASSIVE
) {
760 ret
= freq_qos_update_request(&cpudata
->req
[1], policy
->cpuinfo
.max_freq
);
762 pr_debug("Failed to update freq constraint: CPU%d\n", cpudata
->cpu
);
765 return ret
< 0 ? ret
: 0;
768 static int amd_pstate_set_boost(struct cpufreq_policy
*policy
, int state
)
770 struct amd_cpudata
*cpudata
= policy
->driver_data
;
773 if (!cpudata
->boost_supported
) {
774 pr_err("Boost mode is not supported by this processor or SBIOS\n");
777 guard(mutex
)(&amd_pstate_driver_lock
);
779 ret
= amd_pstate_cpu_boost_update(policy
, state
);
780 refresh_frequency_limits(policy
);
785 static int amd_pstate_init_boost_support(struct amd_cpudata
*cpudata
)
791 * If platform has no CPB support or disable it, initialize current driver
792 * boost_enabled state to be false, it is not an error for cpufreq core to handle.
794 if (!cpu_feature_enabled(X86_FEATURE_CPB
)) {
795 pr_debug_once("Boost CPB capabilities not present in the processor\n");
800 ret
= rdmsrl_on_cpu(cpudata
->cpu
, MSR_K7_HWCR
, &boost_val
);
802 pr_err_once("failed to read initial CPU boost state!\n");
807 if (!(boost_val
& MSR_K7_HWCR_CPB_DIS
))
808 cpudata
->boost_supported
= true;
813 cpudata
->boost_supported
= false;
817 static void amd_perf_ctl_reset(unsigned int cpu
)
819 wrmsrl_on_cpu(cpu
, MSR_AMD_PERF_CTL
, 0);
823 * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
824 * due to locking, so queue the work for later.
826 static void amd_pstste_sched_prefcore_workfn(struct work_struct
*work
)
828 sched_set_itmt_support();
830 static DECLARE_WORK(sched_prefcore_work
, amd_pstste_sched_prefcore_workfn
);
832 #define CPPC_MAX_PERF U8_MAX
834 static void amd_pstate_init_prefcore(struct amd_cpudata
*cpudata
)
836 /* user disabled or not detected */
837 if (!amd_pstate_prefcore
)
840 cpudata
->hw_prefcore
= true;
843 * The priorities can be set regardless of whether or not
844 * sched_set_itmt_support(true) has been called and it is valid to
845 * update them at any time after it has been called.
847 sched_set_itmt_core_prio((int)READ_ONCE(cpudata
->prefcore_ranking
), cpudata
->cpu
);
849 schedule_work(&sched_prefcore_work
);
852 static void amd_pstate_update_limits(unsigned int cpu
)
854 struct cpufreq_policy
*policy
= cpufreq_cpu_get(cpu
);
855 struct amd_cpudata
*cpudata
;
856 u32 prev_high
= 0, cur_high
= 0;
858 bool highest_perf_changed
= false;
863 cpudata
= policy
->driver_data
;
865 if (!amd_pstate_prefcore
)
868 guard(mutex
)(&amd_pstate_driver_lock
);
870 ret
= amd_get_highest_perf(cpu
, &cur_high
);
872 goto free_cpufreq_put
;
874 prev_high
= READ_ONCE(cpudata
->prefcore_ranking
);
875 highest_perf_changed
= (prev_high
!= cur_high
);
876 if (highest_perf_changed
) {
877 WRITE_ONCE(cpudata
->prefcore_ranking
, cur_high
);
879 if (cur_high
< CPPC_MAX_PERF
)
880 sched_set_itmt_core_prio((int)cur_high
, cpu
);
884 cpufreq_cpu_put(policy
);
886 if (!highest_perf_changed
)
887 cpufreq_update_policy(cpu
);
892 * Get pstate transition delay time from ACPI tables that firmware set
893 * instead of using hardcode value directly.
895 static u32
amd_pstate_get_transition_delay_us(unsigned int cpu
)
897 u32 transition_delay_ns
;
899 transition_delay_ns
= cppc_get_transition_latency(cpu
);
900 if (transition_delay_ns
== CPUFREQ_ETERNAL
) {
901 if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC
))
902 return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY
;
904 return AMD_PSTATE_TRANSITION_DELAY
;
907 return transition_delay_ns
/ NSEC_PER_USEC
;
911 * Get pstate transition latency value from ACPI tables that firmware
912 * set instead of using hardcode value directly.
914 static u32
amd_pstate_get_transition_latency(unsigned int cpu
)
916 u32 transition_latency
;
918 transition_latency
= cppc_get_transition_latency(cpu
);
919 if (transition_latency
== CPUFREQ_ETERNAL
)
920 return AMD_PSTATE_TRANSITION_LATENCY
;
922 return transition_latency
;
926 * amd_pstate_init_freq: Initialize the max_freq, min_freq,
927 * nominal_freq and lowest_nonlinear_freq for
928 * the @cpudata object.
930 * Requires: highest_perf, lowest_perf, nominal_perf and
931 * lowest_nonlinear_perf members of @cpudata to be
934 * Returns 0 on success, non-zero value on failure.
936 static int amd_pstate_init_freq(struct amd_cpudata
*cpudata
)
939 u32 min_freq
, max_freq
;
940 u32 highest_perf
, nominal_perf
, nominal_freq
;
941 u32 lowest_nonlinear_perf
, lowest_nonlinear_freq
;
942 struct cppc_perf_caps cppc_perf
;
944 ret
= cppc_get_perf_caps(cpudata
->cpu
, &cppc_perf
);
948 if (quirks
&& quirks
->lowest_freq
)
949 min_freq
= quirks
->lowest_freq
;
951 min_freq
= cppc_perf
.lowest_freq
;
953 if (quirks
&& quirks
->nominal_freq
)
954 nominal_freq
= quirks
->nominal_freq
;
956 nominal_freq
= cppc_perf
.nominal_freq
;
958 highest_perf
= READ_ONCE(cpudata
->highest_perf
);
959 nominal_perf
= READ_ONCE(cpudata
->nominal_perf
);
960 max_freq
= div_u64((u64
)highest_perf
* nominal_freq
, nominal_perf
);
962 lowest_nonlinear_perf
= READ_ONCE(cpudata
->lowest_nonlinear_perf
);
963 lowest_nonlinear_freq
= div_u64((u64
)nominal_freq
* lowest_nonlinear_perf
, nominal_perf
);
964 WRITE_ONCE(cpudata
->min_freq
, min_freq
* 1000);
965 WRITE_ONCE(cpudata
->lowest_nonlinear_freq
, lowest_nonlinear_freq
* 1000);
966 WRITE_ONCE(cpudata
->nominal_freq
, nominal_freq
* 1000);
967 WRITE_ONCE(cpudata
->max_freq
, max_freq
* 1000);
970 * Below values need to be initialized correctly, otherwise driver will fail to load
971 * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf
972 * lowest_nonlinear_freq is a value between [min_freq, nominal_freq]
973 * Check _CPC in ACPI table objects if any values are incorrect
975 if (min_freq
<= 0 || max_freq
<= 0 || nominal_freq
<= 0 || min_freq
> max_freq
) {
976 pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n",
977 min_freq
, max_freq
, nominal_freq
);
981 if (lowest_nonlinear_freq
<= min_freq
|| lowest_nonlinear_freq
> nominal_freq
) {
982 pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n",
983 lowest_nonlinear_freq
, min_freq
, nominal_freq
);
990 static int amd_pstate_cpu_init(struct cpufreq_policy
*policy
)
992 int min_freq
, max_freq
, ret
;
994 struct amd_cpudata
*cpudata
;
997 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
998 * which is ideal for initialization process.
1000 amd_perf_ctl_reset(policy
->cpu
);
1001 dev
= get_cpu_device(policy
->cpu
);
1005 cpudata
= kzalloc(sizeof(*cpudata
), GFP_KERNEL
);
1009 cpudata
->cpu
= policy
->cpu
;
1011 ret
= amd_pstate_init_perf(cpudata
);
1015 amd_pstate_init_prefcore(cpudata
);
1017 ret
= amd_pstate_init_freq(cpudata
);
1021 ret
= amd_pstate_init_boost_support(cpudata
);
1025 min_freq
= READ_ONCE(cpudata
->min_freq
);
1026 max_freq
= READ_ONCE(cpudata
->max_freq
);
1028 policy
->cpuinfo
.transition_latency
= amd_pstate_get_transition_latency(policy
->cpu
);
1029 policy
->transition_delay_us
= amd_pstate_get_transition_delay_us(policy
->cpu
);
1031 policy
->min
= min_freq
;
1032 policy
->max
= max_freq
;
1034 policy
->cpuinfo
.min_freq
= min_freq
;
1035 policy
->cpuinfo
.max_freq
= max_freq
;
1037 policy
->boost_enabled
= READ_ONCE(cpudata
->boost_supported
);
1039 /* It will be updated by governor */
1040 policy
->cur
= policy
->cpuinfo
.min_freq
;
1042 if (cpu_feature_enabled(X86_FEATURE_CPPC
))
1043 policy
->fast_switch_possible
= true;
1045 ret
= freq_qos_add_request(&policy
->constraints
, &cpudata
->req
[0],
1046 FREQ_QOS_MIN
, FREQ_QOS_MIN_DEFAULT_VALUE
);
1048 dev_err(dev
, "Failed to add min-freq constraint (%d)\n", ret
);
1052 ret
= freq_qos_add_request(&policy
->constraints
, &cpudata
->req
[1],
1053 FREQ_QOS_MAX
, policy
->cpuinfo
.max_freq
);
1055 dev_err(dev
, "Failed to add max-freq constraint (%d)\n", ret
);
1059 cpudata
->max_limit_freq
= max_freq
;
1060 cpudata
->min_limit_freq
= min_freq
;
1062 policy
->driver_data
= cpudata
;
1064 if (!current_pstate_driver
->adjust_perf
)
1065 current_pstate_driver
->adjust_perf
= amd_pstate_adjust_perf
;
1070 freq_qos_remove_request(&cpudata
->req
[0]);
1076 static void amd_pstate_cpu_exit(struct cpufreq_policy
*policy
)
1078 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1080 freq_qos_remove_request(&cpudata
->req
[1]);
1081 freq_qos_remove_request(&cpudata
->req
[0]);
1082 policy
->fast_switch_possible
= false;
1086 static int amd_pstate_cpu_resume(struct cpufreq_policy
*policy
)
1090 ret
= amd_pstate_cppc_enable(true);
1092 pr_err("failed to enable amd-pstate during resume, return %d\n", ret
);
1097 static int amd_pstate_cpu_suspend(struct cpufreq_policy
*policy
)
1101 ret
= amd_pstate_cppc_enable(false);
1103 pr_err("failed to disable amd-pstate during suspend, return %d\n", ret
);
1108 /* Sysfs attributes */
1111 * This frequency is to indicate the maximum hardware frequency.
1112 * If boost is not active but supported, the frequency will be larger than the
1115 static ssize_t
show_amd_pstate_max_freq(struct cpufreq_policy
*policy
,
1119 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1121 max_freq
= READ_ONCE(cpudata
->max_freq
);
1125 return sysfs_emit(buf
, "%u\n", max_freq
);
1128 static ssize_t
show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy
*policy
,
1132 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1134 freq
= READ_ONCE(cpudata
->lowest_nonlinear_freq
);
1138 return sysfs_emit(buf
, "%u\n", freq
);
1142 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we
1143 * need to expose it to sysfs.
1145 static ssize_t
show_amd_pstate_highest_perf(struct cpufreq_policy
*policy
,
1149 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1151 perf
= READ_ONCE(cpudata
->highest_perf
);
1153 return sysfs_emit(buf
, "%u\n", perf
);
1156 static ssize_t
show_amd_pstate_prefcore_ranking(struct cpufreq_policy
*policy
,
1160 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1162 perf
= READ_ONCE(cpudata
->prefcore_ranking
);
1164 return sysfs_emit(buf
, "%u\n", perf
);
1167 static ssize_t
show_amd_pstate_hw_prefcore(struct cpufreq_policy
*policy
,
1171 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1173 hw_prefcore
= READ_ONCE(cpudata
->hw_prefcore
);
1175 return sysfs_emit(buf
, "%s\n", str_enabled_disabled(hw_prefcore
));
1178 static ssize_t
show_energy_performance_available_preferences(
1179 struct cpufreq_policy
*policy
, char *buf
)
1183 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1185 if (cpudata
->policy
== CPUFREQ_POLICY_PERFORMANCE
)
1186 return sysfs_emit_at(buf
, offset
, "%s\n",
1187 energy_perf_strings
[EPP_INDEX_PERFORMANCE
]);
1189 while (energy_perf_strings
[i
] != NULL
)
1190 offset
+= sysfs_emit_at(buf
, offset
, "%s ", energy_perf_strings
[i
++]);
1192 offset
+= sysfs_emit_at(buf
, offset
, "\n");
1197 static ssize_t
store_energy_performance_preference(
1198 struct cpufreq_policy
*policy
, const char *buf
, size_t count
)
1200 char str_preference
[21];
1203 ret
= sscanf(buf
, "%20s", str_preference
);
1207 ret
= match_string(energy_perf_strings
, -1, str_preference
);
1211 guard(mutex
)(&amd_pstate_limits_lock
);
1213 ret
= amd_pstate_set_energy_pref_index(policy
, ret
);
1215 return ret
? ret
: count
;
1218 static ssize_t
show_energy_performance_preference(
1219 struct cpufreq_policy
*policy
, char *buf
)
1221 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1224 switch (cpudata
->epp_cached
) {
1225 case AMD_CPPC_EPP_PERFORMANCE
:
1226 preference
= EPP_INDEX_PERFORMANCE
;
1228 case AMD_CPPC_EPP_BALANCE_PERFORMANCE
:
1229 preference
= EPP_INDEX_BALANCE_PERFORMANCE
;
1231 case AMD_CPPC_EPP_BALANCE_POWERSAVE
:
1232 preference
= EPP_INDEX_BALANCE_POWERSAVE
;
1234 case AMD_CPPC_EPP_POWERSAVE
:
1235 preference
= EPP_INDEX_POWERSAVE
;
1241 return sysfs_emit(buf
, "%s\n", energy_perf_strings
[preference
]);
1244 static void amd_pstate_driver_cleanup(void)
1246 amd_pstate_cppc_enable(false);
1247 cppc_state
= AMD_PSTATE_DISABLE
;
1248 current_pstate_driver
= NULL
;
1251 static int amd_pstate_set_driver(int mode_idx
)
1253 if (mode_idx
>= AMD_PSTATE_DISABLE
&& mode_idx
< AMD_PSTATE_MAX
) {
1254 cppc_state
= mode_idx
;
1255 if (cppc_state
== AMD_PSTATE_DISABLE
)
1256 pr_info("driver is explicitly disabled\n");
1258 if (cppc_state
== AMD_PSTATE_ACTIVE
)
1259 current_pstate_driver
= &amd_pstate_epp_driver
;
1261 if (cppc_state
== AMD_PSTATE_PASSIVE
|| cppc_state
== AMD_PSTATE_GUIDED
)
1262 current_pstate_driver
= &amd_pstate_driver
;
1270 static int amd_pstate_register_driver(int mode
)
1274 ret
= amd_pstate_set_driver(mode
);
1280 ret
= amd_pstate_cppc_enable(true);
1282 pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n",
1284 amd_pstate_driver_cleanup();
1288 /* at least one CPU supports CPB */
1289 current_pstate_driver
->boost_enabled
= cpu_feature_enabled(X86_FEATURE_CPB
);
1291 ret
= cpufreq_register_driver(current_pstate_driver
);
1293 amd_pstate_driver_cleanup();
1300 static int amd_pstate_unregister_driver(int dummy
)
1302 cpufreq_unregister_driver(current_pstate_driver
);
1303 amd_pstate_driver_cleanup();
1307 static int amd_pstate_change_mode_without_dvr_change(int mode
)
1313 if (cpu_feature_enabled(X86_FEATURE_CPPC
) || cppc_state
== AMD_PSTATE_ACTIVE
)
1316 for_each_present_cpu(cpu
) {
1317 cppc_set_auto_sel(cpu
, (cppc_state
== AMD_PSTATE_PASSIVE
) ? 0 : 1);
1323 static int amd_pstate_change_driver_mode(int mode
)
1327 ret
= amd_pstate_unregister_driver(0);
1331 ret
= amd_pstate_register_driver(mode
);
1338 static cppc_mode_transition_fn mode_state_machine
[AMD_PSTATE_MAX
][AMD_PSTATE_MAX
] = {
1339 [AMD_PSTATE_DISABLE
] = {
1340 [AMD_PSTATE_DISABLE
] = NULL
,
1341 [AMD_PSTATE_PASSIVE
] = amd_pstate_register_driver
,
1342 [AMD_PSTATE_ACTIVE
] = amd_pstate_register_driver
,
1343 [AMD_PSTATE_GUIDED
] = amd_pstate_register_driver
,
1345 [AMD_PSTATE_PASSIVE
] = {
1346 [AMD_PSTATE_DISABLE
] = amd_pstate_unregister_driver
,
1347 [AMD_PSTATE_PASSIVE
] = NULL
,
1348 [AMD_PSTATE_ACTIVE
] = amd_pstate_change_driver_mode
,
1349 [AMD_PSTATE_GUIDED
] = amd_pstate_change_mode_without_dvr_change
,
1351 [AMD_PSTATE_ACTIVE
] = {
1352 [AMD_PSTATE_DISABLE
] = amd_pstate_unregister_driver
,
1353 [AMD_PSTATE_PASSIVE
] = amd_pstate_change_driver_mode
,
1354 [AMD_PSTATE_ACTIVE
] = NULL
,
1355 [AMD_PSTATE_GUIDED
] = amd_pstate_change_driver_mode
,
1357 [AMD_PSTATE_GUIDED
] = {
1358 [AMD_PSTATE_DISABLE
] = amd_pstate_unregister_driver
,
1359 [AMD_PSTATE_PASSIVE
] = amd_pstate_change_mode_without_dvr_change
,
1360 [AMD_PSTATE_ACTIVE
] = amd_pstate_change_driver_mode
,
1361 [AMD_PSTATE_GUIDED
] = NULL
,
1365 static ssize_t
amd_pstate_show_status(char *buf
)
1367 if (!current_pstate_driver
)
1368 return sysfs_emit(buf
, "disable\n");
1370 return sysfs_emit(buf
, "%s\n", amd_pstate_mode_string
[cppc_state
]);
1373 int amd_pstate_update_status(const char *buf
, size_t size
)
1377 if (size
> strlen("passive") || size
< strlen("active"))
1380 mode_idx
= get_mode_idx_from_str(buf
, size
);
1382 if (mode_idx
< 0 || mode_idx
>= AMD_PSTATE_MAX
)
1385 if (mode_state_machine
[cppc_state
][mode_idx
])
1386 return mode_state_machine
[cppc_state
][mode_idx
](mode_idx
);
1390 EXPORT_SYMBOL_GPL(amd_pstate_update_status
);
1392 static ssize_t
status_show(struct device
*dev
,
1393 struct device_attribute
*attr
, char *buf
)
1396 guard(mutex
)(&amd_pstate_driver_lock
);
1398 return amd_pstate_show_status(buf
);
1401 static ssize_t
status_store(struct device
*a
, struct device_attribute
*b
,
1402 const char *buf
, size_t count
)
1404 char *p
= memchr(buf
, '\n', count
);
1407 guard(mutex
)(&amd_pstate_driver_lock
);
1408 ret
= amd_pstate_update_status(buf
, p
? p
- buf
: count
);
1410 return ret
< 0 ? ret
: count
;
1413 static ssize_t
prefcore_show(struct device
*dev
,
1414 struct device_attribute
*attr
, char *buf
)
1416 return sysfs_emit(buf
, "%s\n", str_enabled_disabled(amd_pstate_prefcore
));
1419 cpufreq_freq_attr_ro(amd_pstate_max_freq
);
1420 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq
);
1422 cpufreq_freq_attr_ro(amd_pstate_highest_perf
);
1423 cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking
);
1424 cpufreq_freq_attr_ro(amd_pstate_hw_prefcore
);
1425 cpufreq_freq_attr_rw(energy_performance_preference
);
1426 cpufreq_freq_attr_ro(energy_performance_available_preferences
);
1427 static DEVICE_ATTR_RW(status
);
1428 static DEVICE_ATTR_RO(prefcore
);
1430 static struct freq_attr
*amd_pstate_attr
[] = {
1431 &amd_pstate_max_freq
,
1432 &amd_pstate_lowest_nonlinear_freq
,
1433 &amd_pstate_highest_perf
,
1434 &amd_pstate_prefcore_ranking
,
1435 &amd_pstate_hw_prefcore
,
1439 static struct freq_attr
*amd_pstate_epp_attr
[] = {
1440 &amd_pstate_max_freq
,
1441 &amd_pstate_lowest_nonlinear_freq
,
1442 &amd_pstate_highest_perf
,
1443 &amd_pstate_prefcore_ranking
,
1444 &amd_pstate_hw_prefcore
,
1445 &energy_performance_preference
,
1446 &energy_performance_available_preferences
,
1450 static struct attribute
*pstate_global_attributes
[] = {
1451 &dev_attr_status
.attr
,
1452 &dev_attr_prefcore
.attr
,
1456 static const struct attribute_group amd_pstate_global_attr_group
= {
1457 .name
= "amd_pstate",
1458 .attrs
= pstate_global_attributes
,
1461 static bool amd_pstate_acpi_pm_profile_server(void)
1463 switch (acpi_gbl_FADT
.preferred_profile
) {
1464 case PM_ENTERPRISE_SERVER
:
1465 case PM_SOHO_SERVER
:
1466 case PM_PERFORMANCE_SERVER
:
1472 static bool amd_pstate_acpi_pm_profile_undefined(void)
1474 if (acpi_gbl_FADT
.preferred_profile
== PM_UNSPECIFIED
)
1476 if (acpi_gbl_FADT
.preferred_profile
>= NR_PM_PROFILES
)
1481 static int amd_pstate_epp_cpu_init(struct cpufreq_policy
*policy
)
1483 int min_freq
, max_freq
, ret
;
1484 struct amd_cpudata
*cpudata
;
1489 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
1490 * which is ideal for initialization process.
1492 amd_perf_ctl_reset(policy
->cpu
);
1493 dev
= get_cpu_device(policy
->cpu
);
1497 cpudata
= kzalloc(sizeof(*cpudata
), GFP_KERNEL
);
1501 cpudata
->cpu
= policy
->cpu
;
1503 ret
= amd_pstate_init_perf(cpudata
);
1507 amd_pstate_init_prefcore(cpudata
);
1509 ret
= amd_pstate_init_freq(cpudata
);
1513 ret
= amd_pstate_init_boost_support(cpudata
);
1517 min_freq
= READ_ONCE(cpudata
->min_freq
);
1518 max_freq
= READ_ONCE(cpudata
->max_freq
);
1520 policy
->cpuinfo
.min_freq
= min_freq
;
1521 policy
->cpuinfo
.max_freq
= max_freq
;
1522 /* It will be updated by governor */
1523 policy
->cur
= policy
->cpuinfo
.min_freq
;
1525 policy
->driver_data
= cpudata
;
1527 policy
->min
= policy
->cpuinfo
.min_freq
;
1528 policy
->max
= policy
->cpuinfo
.max_freq
;
1530 policy
->boost_enabled
= READ_ONCE(cpudata
->boost_supported
);
1533 * Set the policy to provide a valid fallback value in case
1534 * the default cpufreq governor is neither powersave nor performance.
1536 if (amd_pstate_acpi_pm_profile_server() ||
1537 amd_pstate_acpi_pm_profile_undefined()) {
1538 policy
->policy
= CPUFREQ_POLICY_PERFORMANCE
;
1539 cpudata
->epp_default
= amd_pstate_get_epp(cpudata
);
1541 policy
->policy
= CPUFREQ_POLICY_POWERSAVE
;
1542 cpudata
->epp_default
= AMD_CPPC_EPP_BALANCE_PERFORMANCE
;
1545 if (cpu_feature_enabled(X86_FEATURE_CPPC
)) {
1546 ret
= rdmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_REQ
, &value
);
1549 WRITE_ONCE(cpudata
->cppc_req_cached
, value
);
1551 ret
= rdmsrl_on_cpu(cpudata
->cpu
, MSR_AMD_CPPC_CAP1
, &value
);
1554 WRITE_ONCE(cpudata
->cppc_cap1_cached
, value
);
1556 ret
= amd_pstate_set_epp(cpudata
, cpudata
->epp_default
);
1560 current_pstate_driver
->adjust_perf
= NULL
;
1569 static void amd_pstate_epp_cpu_exit(struct cpufreq_policy
*policy
)
1571 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1575 policy
->driver_data
= NULL
;
1578 pr_debug("CPU %d exiting\n", policy
->cpu
);
1581 static int amd_pstate_epp_update_limit(struct cpufreq_policy
*policy
)
1583 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1586 amd_pstate_update_min_max_limit(policy
);
1588 if (cpudata
->policy
== CPUFREQ_POLICY_PERFORMANCE
)
1591 epp
= READ_ONCE(cpudata
->epp_cached
);
1593 if (trace_amd_pstate_epp_perf_enabled()) {
1594 trace_amd_pstate_epp_perf(cpudata
->cpu
, cpudata
->highest_perf
, epp
,
1595 cpudata
->min_limit_perf
,
1596 cpudata
->max_limit_perf
,
1597 policy
->boost_enabled
);
1600 return amd_pstate_update_perf(cpudata
, cpudata
->min_limit_perf
, 0U,
1601 cpudata
->max_limit_perf
, epp
, false);
1604 static int amd_pstate_epp_set_policy(struct cpufreq_policy
*policy
)
1606 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1609 if (!policy
->cpuinfo
.max_freq
)
1612 pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
1613 policy
->cpuinfo
.max_freq
, policy
->max
);
1615 cpudata
->policy
= policy
->policy
;
1617 ret
= amd_pstate_epp_update_limit(policy
);
1622 * policy->cur is never updated with the amd_pstate_epp driver, but it
1623 * is used as a stale frequency value. So, keep it within limits.
1625 policy
->cur
= policy
->min
;
1630 static int amd_pstate_epp_reenable(struct cpufreq_policy
*policy
)
1632 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1636 ret
= amd_pstate_cppc_enable(true);
1638 pr_err("failed to enable amd pstate during resume, return %d\n", ret
);
1640 max_perf
= READ_ONCE(cpudata
->highest_perf
);
1642 if (trace_amd_pstate_epp_perf_enabled()) {
1643 trace_amd_pstate_epp_perf(cpudata
->cpu
, cpudata
->highest_perf
,
1644 cpudata
->epp_cached
,
1645 FIELD_GET(AMD_CPPC_MIN_PERF_MASK
, cpudata
->cppc_req_cached
),
1646 max_perf
, policy
->boost_enabled
);
1649 return amd_pstate_update_perf(cpudata
, 0, 0, max_perf
, cpudata
->epp_cached
, false);
1652 static int amd_pstate_epp_cpu_online(struct cpufreq_policy
*policy
)
1654 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1657 pr_debug("AMD CPU Core %d going online\n", cpudata
->cpu
);
1659 ret
= amd_pstate_epp_reenable(policy
);
1662 cpudata
->suspended
= false;
1667 static int amd_pstate_epp_cpu_offline(struct cpufreq_policy
*policy
)
1669 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1672 if (cpudata
->suspended
)
1675 min_perf
= READ_ONCE(cpudata
->lowest_perf
);
1677 guard(mutex
)(&amd_pstate_limits_lock
);
1679 if (trace_amd_pstate_epp_perf_enabled()) {
1680 trace_amd_pstate_epp_perf(cpudata
->cpu
, cpudata
->highest_perf
,
1681 AMD_CPPC_EPP_BALANCE_POWERSAVE
,
1682 min_perf
, min_perf
, policy
->boost_enabled
);
1685 return amd_pstate_update_perf(cpudata
, min_perf
, 0, min_perf
,
1686 AMD_CPPC_EPP_BALANCE_POWERSAVE
, false);
1689 static int amd_pstate_epp_suspend(struct cpufreq_policy
*policy
)
1691 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1694 /* avoid suspending when EPP is not enabled */
1695 if (cppc_state
!= AMD_PSTATE_ACTIVE
)
1698 /* set this flag to avoid setting core offline*/
1699 cpudata
->suspended
= true;
1701 /* disable CPPC in lowlevel firmware */
1702 ret
= amd_pstate_cppc_enable(false);
1704 pr_err("failed to suspend, return %d\n", ret
);
1709 static int amd_pstate_epp_resume(struct cpufreq_policy
*policy
)
1711 struct amd_cpudata
*cpudata
= policy
->driver_data
;
1713 if (cpudata
->suspended
) {
1714 guard(mutex
)(&amd_pstate_limits_lock
);
1716 /* enable amd pstate from suspend state*/
1717 amd_pstate_epp_reenable(policy
);
1719 cpudata
->suspended
= false;
1725 static struct cpufreq_driver amd_pstate_driver
= {
1726 .flags
= CPUFREQ_CONST_LOOPS
| CPUFREQ_NEED_UPDATE_LIMITS
,
1727 .verify
= amd_pstate_verify
,
1728 .target
= amd_pstate_target
,
1729 .fast_switch
= amd_pstate_fast_switch
,
1730 .init
= amd_pstate_cpu_init
,
1731 .exit
= amd_pstate_cpu_exit
,
1732 .suspend
= amd_pstate_cpu_suspend
,
1733 .resume
= amd_pstate_cpu_resume
,
1734 .set_boost
= amd_pstate_set_boost
,
1735 .update_limits
= amd_pstate_update_limits
,
1736 .name
= "amd-pstate",
1737 .attr
= amd_pstate_attr
,
1740 static struct cpufreq_driver amd_pstate_epp_driver
= {
1741 .flags
= CPUFREQ_CONST_LOOPS
,
1742 .verify
= amd_pstate_verify
,
1743 .setpolicy
= amd_pstate_epp_set_policy
,
1744 .init
= amd_pstate_epp_cpu_init
,
1745 .exit
= amd_pstate_epp_cpu_exit
,
1746 .offline
= amd_pstate_epp_cpu_offline
,
1747 .online
= amd_pstate_epp_cpu_online
,
1748 .suspend
= amd_pstate_epp_suspend
,
1749 .resume
= amd_pstate_epp_resume
,
1750 .update_limits
= amd_pstate_update_limits
,
1751 .set_boost
= amd_pstate_set_boost
,
1752 .name
= "amd-pstate-epp",
1753 .attr
= amd_pstate_epp_attr
,
1757 * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
1758 * show the debug message that helps to check if the CPU has CPPC support for loading issue.
1760 static bool amd_cppc_supported(void)
1762 struct cpuinfo_x86
*c
= &cpu_data(0);
1765 if ((boot_cpu_data
.x86
== 0x17) && (boot_cpu_data
.x86_model
< 0x30)) {
1766 pr_debug_once("CPPC feature is not supported by the processor\n");
1771 * If the CPPC feature is disabled in the BIOS for processors
1772 * that support MSR-based CPPC, the AMD Pstate driver may not
1773 * function correctly.
1775 * For such processors, check the CPPC flag and display a
1776 * warning message if the platform supports CPPC.
1778 * Note: The code check below will not abort the driver
1779 * registration process because of the code is added for
1780 * debugging purposes. Besides, it may still be possible for
1781 * the driver to work using the shared-memory mechanism.
1783 if (!cpu_feature_enabled(X86_FEATURE_CPPC
)) {
1784 if (cpu_feature_enabled(X86_FEATURE_ZEN2
)) {
1785 switch (c
->x86_model
) {
1791 } else if (cpu_feature_enabled(X86_FEATURE_ZEN3
) ||
1792 cpu_feature_enabled(X86_FEATURE_ZEN4
)) {
1793 switch (c
->x86_model
) {
1799 } else if (cpu_feature_enabled(X86_FEATURE_ZEN5
)) {
1805 pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n"
1806 "Please enable it if your BIOS has the CPPC option.\n");
1810 static int __init
amd_pstate_init(void)
1812 struct device
*dev_root
;
1815 if (boot_cpu_data
.x86_vendor
!= X86_VENDOR_AMD
)
1818 /* show debug message only if CPPC is not supported */
1819 if (!amd_cppc_supported())
1822 /* show warning message when BIOS broken or ACPI disabled */
1823 if (!acpi_cpc_valid()) {
1824 pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
1828 /* don't keep reloading if cpufreq_driver exists */
1829 if (cpufreq_get_current_driver())
1834 /* check if this machine need CPPC quirks */
1835 dmi_check_system(amd_pstate_quirks_table
);
1838 * determine the driver mode from the command line or kernel config.
1839 * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED.
1840 * command line options will override the kernel config settings.
1843 if (cppc_state
== AMD_PSTATE_UNDEFINED
) {
1844 /* Disable on the following configs by default:
1845 * 1. Undefined platforms
1846 * 2. Server platforms with CPUs older than Family 0x1A.
1848 if (amd_pstate_acpi_pm_profile_undefined() ||
1849 (amd_pstate_acpi_pm_profile_server() && boot_cpu_data
.x86
< 0x1A)) {
1850 pr_info("driver load is disabled, boot with specific mode to enable this\n");
1853 /* get driver mode from kernel config option [1:4] */
1854 cppc_state
= CONFIG_X86_AMD_PSTATE_DEFAULT_MODE
;
1857 if (cppc_state
== AMD_PSTATE_DISABLE
) {
1858 pr_info("driver load is disabled, boot with specific mode to enable this\n");
1862 /* capability check */
1863 if (cpu_feature_enabled(X86_FEATURE_CPPC
)) {
1864 pr_debug("AMD CPPC MSR based functionality is supported\n");
1866 pr_debug("AMD CPPC shared memory based functionality is supported\n");
1867 static_call_update(amd_pstate_cppc_enable
, shmem_cppc_enable
);
1868 static_call_update(amd_pstate_init_perf
, shmem_init_perf
);
1869 static_call_update(amd_pstate_update_perf
, shmem_update_perf
);
1870 static_call_update(amd_pstate_get_epp
, shmem_get_epp
);
1871 static_call_update(amd_pstate_set_epp
, shmem_set_epp
);
1874 if (amd_pstate_prefcore
) {
1875 ret
= amd_detect_prefcore(&amd_pstate_prefcore
);
1880 ret
= amd_pstate_register_driver(cppc_state
);
1882 pr_err("failed to register with return %d\n", ret
);
1886 dev_root
= bus_get_dev_root(&cpu_subsys
);
1888 ret
= sysfs_create_group(&dev_root
->kobj
, &amd_pstate_global_attr_group
);
1889 put_device(dev_root
);
1891 pr_err("sysfs attribute export failed with error %d.\n", ret
);
1892 goto global_attr_free
;
1899 cpufreq_unregister_driver(current_pstate_driver
);
1900 amd_pstate_cppc_enable(false);
1903 device_initcall(amd_pstate_init
);
1905 static int __init
amd_pstate_param(char *str
)
1914 mode_idx
= get_mode_idx_from_str(str
, size
);
1916 return amd_pstate_set_driver(mode_idx
);
1919 static int __init
amd_prefcore_param(char *str
)
1921 if (!strcmp(str
, "disable"))
1922 amd_pstate_prefcore
= false;
1927 early_param("amd_pstate", amd_pstate_param
);
1928 early_param("amd_prefcore", amd_prefcore_param
);
1930 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
1931 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");