cpufreq/amd-pstate: Fix per-policy boost flag incorrect when fail
[pf-kernel.git] / drivers / cpufreq / amd-pstate.c
blobcd1af61f389a6a31b2ad7dd9dbe1f0688b4204d3
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * amd-pstate.c - AMD Processor P-state Frequency Driver
5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
7 * Author: Huang Rui <ray.huang@amd.com>
9 * AMD P-State introduces a new CPU performance scaling design for AMD
10 * processors using the ACPI Collaborative Performance and Power Control (CPPC)
11 * feature which works with the AMD SMU firmware providing a finer grained
12 * frequency control range. It is to replace the legacy ACPI P-States control,
13 * allows a flexible, low-latency interface for the Linux kernel to directly
14 * communicate the performance hints to hardware.
16 * AMD P-State is supported on recent AMD Zen base CPU series include some of
17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD
18 * P-State supported system. And there are two types of hardware implementations
19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution.
20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
25 #include <linux/bitfield.h>
26 #include <linux/kernel.h>
27 #include <linux/module.h>
28 #include <linux/init.h>
29 #include <linux/smp.h>
30 #include <linux/sched.h>
31 #include <linux/cpufreq.h>
32 #include <linux/compiler.h>
33 #include <linux/dmi.h>
34 #include <linux/slab.h>
35 #include <linux/acpi.h>
36 #include <linux/io.h>
37 #include <linux/delay.h>
38 #include <linux/uaccess.h>
39 #include <linux/static_call.h>
40 #include <linux/topology.h>
42 #include <acpi/processor.h>
43 #include <acpi/cppc_acpi.h>
45 #include <asm/msr.h>
46 #include <asm/processor.h>
47 #include <asm/cpufeature.h>
48 #include <asm/cpu_device_id.h>
50 #include "amd-pstate.h"
51 #include "amd-pstate-trace.h"
53 #define AMD_PSTATE_TRANSITION_LATENCY 20000
54 #define AMD_PSTATE_TRANSITION_DELAY 1000
55 #define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
57 #define AMD_CPPC_EPP_PERFORMANCE 0x00
58 #define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
59 #define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
60 #define AMD_CPPC_EPP_POWERSAVE 0xFF
62 static const char * const amd_pstate_mode_string[] = {
63 [AMD_PSTATE_UNDEFINED] = "undefined",
64 [AMD_PSTATE_DISABLE] = "disable",
65 [AMD_PSTATE_PASSIVE] = "passive",
66 [AMD_PSTATE_ACTIVE] = "active",
67 [AMD_PSTATE_GUIDED] = "guided",
68 NULL,
71 const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode)
73 if (mode < 0 || mode >= AMD_PSTATE_MAX)
74 return NULL;
75 return amd_pstate_mode_string[mode];
77 EXPORT_SYMBOL_GPL(amd_pstate_get_mode_string);
79 struct quirk_entry {
80 u32 nominal_freq;
81 u32 lowest_freq;
84 static struct cpufreq_driver *current_pstate_driver;
85 static struct cpufreq_driver amd_pstate_driver;
86 static struct cpufreq_driver amd_pstate_epp_driver;
87 static int cppc_state = AMD_PSTATE_UNDEFINED;
88 static bool cppc_enabled;
89 static bool amd_pstate_prefcore = true;
90 static struct quirk_entry *quirks;
92 #define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
93 #define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
94 #define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
95 #define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
98 * AMD Energy Preference Performance (EPP)
99 * The EPP is used in the CCLK DPM controller to drive
100 * the frequency that a core is going to operate during
101 * short periods of activity. EPP values will be utilized for
102 * different OS profiles (balanced, performance, power savings)
103 * display strings corresponding to EPP index in the
104 * energy_perf_strings[]
105 * index String
106 *-------------------------------------
107 * 0 default
108 * 1 performance
109 * 2 balance_performance
110 * 3 balance_power
111 * 4 power
113 enum energy_perf_value_index {
114 EPP_INDEX_DEFAULT = 0,
115 EPP_INDEX_PERFORMANCE,
116 EPP_INDEX_BALANCE_PERFORMANCE,
117 EPP_INDEX_BALANCE_POWERSAVE,
118 EPP_INDEX_POWERSAVE,
121 static const char * const energy_perf_strings[] = {
122 [EPP_INDEX_DEFAULT] = "default",
123 [EPP_INDEX_PERFORMANCE] = "performance",
124 [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
125 [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
126 [EPP_INDEX_POWERSAVE] = "power",
127 NULL
130 static unsigned int epp_values[] = {
131 [EPP_INDEX_DEFAULT] = 0,
132 [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
133 [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
134 [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
135 [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
138 typedef int (*cppc_mode_transition_fn)(int);
140 static struct quirk_entry quirk_amd_7k62 = {
141 .nominal_freq = 2600,
142 .lowest_freq = 550,
145 static struct quirk_entry quirk_amd_mts = {
146 .nominal_freq = 3600,
147 .lowest_freq = 550,
150 static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
153 * match the broken bios for family 17h processor support CPPC V2
154 * broken BIOS lack of nominal_freq and lowest_freq capabilities
155 * definition in ACPI tables
157 if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
158 quirks = dmi->driver_data;
159 pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
160 return 1;
163 return 0;
166 static int __init dmi_matched_mts_bios_bug(const struct dmi_system_id *dmi)
169 * match the broken bios for ryzen 3000 series processor support CPPC V2
170 * broken BIOS lack of nominal_freq and lowest_freq capabilities
171 * definition in ACPI tables
173 if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
174 quirks = dmi->driver_data;
175 pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
176 return 1;
179 return 0;
181 static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = {
183 .callback = dmi_matched_7k62_bios_bug,
184 .ident = "AMD EPYC 7K62",
185 .matches = {
186 DMI_MATCH(DMI_BIOS_VERSION, "5.14"),
187 DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"),
189 .driver_data = &quirk_amd_7k62,
192 .callback = dmi_matched_mts_bios_bug,
193 .ident = "AMD Ryzen 3000",
194 .matches = {
195 DMI_MATCH(DMI_PRODUCT_NAME, "B450M MORTAR MAX (MS-7B89)"),
196 DMI_MATCH(DMI_BIOS_RELEASE, "06/10/2020"),
197 DMI_MATCH(DMI_BIOS_VERSION, "5.14"),
199 .driver_data = &quirk_amd_mts,
203 MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table);
205 static inline int get_mode_idx_from_str(const char *str, size_t size)
207 int i;
209 for (i=0; i < AMD_PSTATE_MAX; i++) {
210 if (!strncmp(str, amd_pstate_mode_string[i], size))
211 return i;
213 return -EINVAL;
216 static DEFINE_MUTEX(amd_pstate_limits_lock);
217 static DEFINE_MUTEX(amd_pstate_driver_lock);
219 static s16 msr_get_epp(struct amd_cpudata *cpudata)
221 u64 value;
222 int ret;
224 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
225 if (ret < 0) {
226 pr_debug("Could not retrieve energy perf value (%d)\n", ret);
227 return ret;
230 return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value);
233 DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp);
235 static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata)
237 return static_call(amd_pstate_get_epp)(cpudata);
240 static s16 shmem_get_epp(struct amd_cpudata *cpudata)
242 u64 epp;
243 int ret;
245 ret = cppc_get_epp_perf(cpudata->cpu, &epp);
246 if (ret < 0) {
247 pr_debug("Could not retrieve energy perf value (%d)\n", ret);
248 return ret;
251 return (s16)(epp & 0xff);
254 static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
255 u32 des_perf, u32 max_perf, u32 epp, bool fast_switch)
257 u64 value, prev;
259 value = prev = READ_ONCE(cpudata->cppc_req_cached);
261 value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK |
262 AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK);
263 value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf);
264 value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf);
265 value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
266 value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
268 if (value == prev)
269 return 0;
271 if (fast_switch) {
272 wrmsrl(MSR_AMD_CPPC_REQ, value);
273 return 0;
274 } else {
275 int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
277 if (ret)
278 return ret;
281 WRITE_ONCE(cpudata->cppc_req_cached, value);
282 WRITE_ONCE(cpudata->epp_cached, epp);
284 return 0;
287 DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf);
289 static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata,
290 u32 min_perf, u32 des_perf,
291 u32 max_perf, u32 epp,
292 bool fast_switch)
294 return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
295 max_perf, epp, fast_switch);
298 static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp)
300 u64 value, prev;
301 int ret;
303 value = prev = READ_ONCE(cpudata->cppc_req_cached);
304 value &= ~AMD_CPPC_EPP_PERF_MASK;
305 value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
307 if (value == prev)
308 return 0;
310 ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
311 if (ret) {
312 pr_err("failed to set energy perf value (%d)\n", ret);
313 return ret;
316 /* update both so that msr_update_perf() can effectively check */
317 WRITE_ONCE(cpudata->epp_cached, epp);
318 WRITE_ONCE(cpudata->cppc_req_cached, value);
320 return ret;
323 DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp);
325 static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
327 return static_call(amd_pstate_set_epp)(cpudata, epp);
330 static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp)
332 int ret;
333 struct cppc_perf_ctrls perf_ctrls;
335 if (epp == cpudata->epp_cached)
336 return 0;
338 perf_ctrls.energy_perf = epp;
339 ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
340 if (ret) {
341 pr_debug("failed to set energy perf value (%d)\n", ret);
342 return ret;
344 WRITE_ONCE(cpudata->epp_cached, epp);
346 return ret;
349 static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy,
350 int pref_index)
352 struct amd_cpudata *cpudata = policy->driver_data;
353 int epp;
355 if (!pref_index)
356 epp = cpudata->epp_default;
357 else
358 epp = epp_values[pref_index];
360 if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
361 pr_debug("EPP cannot be set under performance policy\n");
362 return -EBUSY;
365 if (trace_amd_pstate_epp_perf_enabled()) {
366 trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
367 epp,
368 FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
369 FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
370 policy->boost_enabled);
373 return amd_pstate_set_epp(cpudata, epp);
376 static inline int msr_cppc_enable(bool enable)
378 int ret, cpu;
379 unsigned long logical_proc_id_mask = 0;
382 * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared.
384 if (!enable)
385 return 0;
387 if (enable == cppc_enabled)
388 return 0;
390 for_each_present_cpu(cpu) {
391 unsigned long logical_id = topology_logical_package_id(cpu);
393 if (test_bit(logical_id, &logical_proc_id_mask))
394 continue;
396 set_bit(logical_id, &logical_proc_id_mask);
398 ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
399 enable);
400 if (ret)
401 return ret;
404 cppc_enabled = enable;
405 return 0;
408 static int shmem_cppc_enable(bool enable)
410 int cpu, ret = 0;
411 struct cppc_perf_ctrls perf_ctrls;
413 if (enable == cppc_enabled)
414 return 0;
416 for_each_present_cpu(cpu) {
417 ret = cppc_set_enable(cpu, enable);
418 if (ret)
419 return ret;
421 /* Enable autonomous mode for EPP */
422 if (cppc_state == AMD_PSTATE_ACTIVE) {
423 /* Set desired perf as zero to allow EPP firmware control */
424 perf_ctrls.desired_perf = 0;
425 ret = cppc_set_perf(cpu, &perf_ctrls);
426 if (ret)
427 return ret;
431 cppc_enabled = enable;
432 return ret;
435 DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable);
437 static inline int amd_pstate_cppc_enable(bool enable)
439 return static_call(amd_pstate_cppc_enable)(enable);
442 static int msr_init_perf(struct amd_cpudata *cpudata)
444 u64 cap1, numerator;
446 int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
447 &cap1);
448 if (ret)
449 return ret;
451 ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator);
452 if (ret)
453 return ret;
455 WRITE_ONCE(cpudata->highest_perf, numerator);
456 WRITE_ONCE(cpudata->max_limit_perf, numerator);
457 WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
458 WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
459 WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
460 WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
461 WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
462 return 0;
465 static int shmem_init_perf(struct amd_cpudata *cpudata)
467 struct cppc_perf_caps cppc_perf;
468 u64 numerator;
470 int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
471 if (ret)
472 return ret;
474 ret = amd_get_boost_ratio_numerator(cpudata->cpu, &numerator);
475 if (ret)
476 return ret;
478 WRITE_ONCE(cpudata->highest_perf, numerator);
479 WRITE_ONCE(cpudata->max_limit_perf, numerator);
480 WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
481 WRITE_ONCE(cpudata->lowest_nonlinear_perf,
482 cppc_perf.lowest_nonlinear_perf);
483 WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
484 WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
485 WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
487 if (cppc_state == AMD_PSTATE_ACTIVE)
488 return 0;
490 ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf);
491 if (ret) {
492 pr_warn("failed to get auto_sel, ret: %d\n", ret);
493 return 0;
496 ret = cppc_set_auto_sel(cpudata->cpu,
497 (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
499 if (ret)
500 pr_warn("failed to set auto_sel, ret: %d\n", ret);
502 return ret;
505 DEFINE_STATIC_CALL(amd_pstate_init_perf, msr_init_perf);
507 static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
509 return static_call(amd_pstate_init_perf)(cpudata);
512 static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
513 u32 des_perf, u32 max_perf, u32 epp, bool fast_switch)
515 struct cppc_perf_ctrls perf_ctrls;
517 if (cppc_state == AMD_PSTATE_ACTIVE) {
518 int ret = shmem_set_epp(cpudata, epp);
520 if (ret)
521 return ret;
524 perf_ctrls.max_perf = max_perf;
525 perf_ctrls.min_perf = min_perf;
526 perf_ctrls.desired_perf = des_perf;
528 return cppc_set_perf(cpudata->cpu, &perf_ctrls);
531 static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
533 u64 aperf, mperf, tsc;
534 unsigned long flags;
536 local_irq_save(flags);
537 rdmsrl(MSR_IA32_APERF, aperf);
538 rdmsrl(MSR_IA32_MPERF, mperf);
539 tsc = rdtsc();
541 if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
542 local_irq_restore(flags);
543 return false;
546 local_irq_restore(flags);
548 cpudata->cur.aperf = aperf;
549 cpudata->cur.mperf = mperf;
550 cpudata->cur.tsc = tsc;
551 cpudata->cur.aperf -= cpudata->prev.aperf;
552 cpudata->cur.mperf -= cpudata->prev.mperf;
553 cpudata->cur.tsc -= cpudata->prev.tsc;
555 cpudata->prev.aperf = aperf;
556 cpudata->prev.mperf = mperf;
557 cpudata->prev.tsc = tsc;
559 cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
561 return true;
564 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
565 u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags)
567 unsigned long max_freq;
568 struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
569 u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
571 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
573 max_freq = READ_ONCE(cpudata->max_limit_freq);
574 policy->cur = div_u64(des_perf * max_freq, max_perf);
576 if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
577 min_perf = des_perf;
578 des_perf = 0;
581 /* limit the max perf when core performance boost feature is disabled */
582 if (!cpudata->boost_supported)
583 max_perf = min_t(unsigned long, nominal_perf, max_perf);
585 if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
586 trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
587 cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
588 cpudata->cpu, fast_switch);
591 amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch);
593 cpufreq_cpu_put(policy);
596 static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
599 * Initialize lower frequency limit (i.e.policy->min) with
600 * lowest_nonlinear_frequency which is the most energy efficient
601 * frequency. Override the initial value set by cpufreq core and
602 * amd-pstate qos_requests.
604 if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
605 struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu);
606 struct amd_cpudata *cpudata;
608 if (!policy)
609 return -EINVAL;
611 cpudata = policy->driver_data;
612 policy_data->min = cpudata->lowest_nonlinear_freq;
613 cpufreq_cpu_put(policy);
616 cpufreq_verify_within_cpu_limits(policy_data);
617 pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min);
619 return 0;
622 static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
624 u32 max_limit_perf, min_limit_perf, max_perf, max_freq;
625 struct amd_cpudata *cpudata = policy->driver_data;
627 max_perf = READ_ONCE(cpudata->highest_perf);
628 max_freq = READ_ONCE(cpudata->max_freq);
629 max_limit_perf = div_u64(policy->max * max_perf, max_freq);
630 min_limit_perf = div_u64(policy->min * max_perf, max_freq);
632 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
633 min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
635 WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
636 WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
637 WRITE_ONCE(cpudata->max_limit_freq, policy->max);
638 WRITE_ONCE(cpudata->min_limit_freq, policy->min);
640 return 0;
643 static int amd_pstate_update_freq(struct cpufreq_policy *policy,
644 unsigned int target_freq, bool fast_switch)
646 struct cpufreq_freqs freqs;
647 struct amd_cpudata *cpudata = policy->driver_data;
648 unsigned long max_perf, min_perf, des_perf, cap_perf;
650 if (!cpudata->max_freq)
651 return -ENODEV;
653 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
654 amd_pstate_update_min_max_limit(policy);
656 cap_perf = READ_ONCE(cpudata->highest_perf);
657 min_perf = READ_ONCE(cpudata->lowest_perf);
658 max_perf = cap_perf;
660 freqs.old = policy->cur;
661 freqs.new = target_freq;
663 des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
664 cpudata->max_freq);
666 WARN_ON(fast_switch && !policy->fast_switch_enabled);
668 * If fast_switch is desired, then there aren't any registered
669 * transition notifiers. See comment for
670 * cpufreq_enable_fast_switch().
672 if (!fast_switch)
673 cpufreq_freq_transition_begin(policy, &freqs);
675 amd_pstate_update(cpudata, min_perf, des_perf,
676 max_perf, fast_switch, policy->governor->flags);
678 if (!fast_switch)
679 cpufreq_freq_transition_end(policy, &freqs, false);
681 return 0;
684 static int amd_pstate_target(struct cpufreq_policy *policy,
685 unsigned int target_freq,
686 unsigned int relation)
688 return amd_pstate_update_freq(policy, target_freq, false);
691 static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
692 unsigned int target_freq)
694 if (!amd_pstate_update_freq(policy, target_freq, true))
695 return target_freq;
696 return policy->cur;
699 static void amd_pstate_adjust_perf(unsigned int cpu,
700 unsigned long _min_perf,
701 unsigned long target_perf,
702 unsigned long capacity)
704 unsigned long max_perf, min_perf, des_perf,
705 cap_perf, lowest_nonlinear_perf;
706 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
707 struct amd_cpudata *cpudata;
709 if (!policy)
710 return;
712 cpudata = policy->driver_data;
714 if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
715 amd_pstate_update_min_max_limit(policy);
718 cap_perf = READ_ONCE(cpudata->highest_perf);
719 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
721 des_perf = cap_perf;
722 if (target_perf < capacity)
723 des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
725 min_perf = READ_ONCE(cpudata->lowest_perf);
726 if (_min_perf < capacity)
727 min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
729 if (min_perf < lowest_nonlinear_perf)
730 min_perf = lowest_nonlinear_perf;
732 max_perf = cap_perf;
733 if (max_perf < min_perf)
734 max_perf = min_perf;
736 des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
738 amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
739 policy->governor->flags);
740 cpufreq_cpu_put(policy);
743 static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
745 struct amd_cpudata *cpudata = policy->driver_data;
746 u32 nominal_freq, max_freq;
747 int ret = 0;
749 nominal_freq = READ_ONCE(cpudata->nominal_freq);
750 max_freq = READ_ONCE(cpudata->max_freq);
752 if (on)
753 policy->cpuinfo.max_freq = max_freq;
754 else if (policy->cpuinfo.max_freq > nominal_freq)
755 policy->cpuinfo.max_freq = nominal_freq;
757 policy->max = policy->cpuinfo.max_freq;
759 if (cppc_state == AMD_PSTATE_PASSIVE) {
760 ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq);
761 if (ret < 0)
762 pr_debug("Failed to update freq constraint: CPU%d\n", cpudata->cpu);
765 return ret < 0 ? ret : 0;
768 static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
770 struct amd_cpudata *cpudata = policy->driver_data;
771 int ret;
773 if (!cpudata->boost_supported) {
774 pr_err("Boost mode is not supported by this processor or SBIOS\n");
775 return -EOPNOTSUPP;
777 guard(mutex)(&amd_pstate_driver_lock);
779 ret = amd_pstate_cpu_boost_update(policy, state);
780 refresh_frequency_limits(policy);
782 return ret;
785 static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata)
787 u64 boost_val;
788 int ret = -1;
791 * If platform has no CPB support or disable it, initialize current driver
792 * boost_enabled state to be false, it is not an error for cpufreq core to handle.
794 if (!cpu_feature_enabled(X86_FEATURE_CPB)) {
795 pr_debug_once("Boost CPB capabilities not present in the processor\n");
796 ret = 0;
797 goto exit_err;
800 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val);
801 if (ret) {
802 pr_err_once("failed to read initial CPU boost state!\n");
803 ret = -EIO;
804 goto exit_err;
807 if (!(boost_val & MSR_K7_HWCR_CPB_DIS))
808 cpudata->boost_supported = true;
810 return 0;
812 exit_err:
813 cpudata->boost_supported = false;
814 return ret;
817 static void amd_perf_ctl_reset(unsigned int cpu)
819 wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
823 * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
824 * due to locking, so queue the work for later.
826 static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
828 sched_set_itmt_support();
830 static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
832 #define CPPC_MAX_PERF U8_MAX
834 static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
836 /* user disabled or not detected */
837 if (!amd_pstate_prefcore)
838 return;
840 cpudata->hw_prefcore = true;
843 * The priorities can be set regardless of whether or not
844 * sched_set_itmt_support(true) has been called and it is valid to
845 * update them at any time after it has been called.
847 sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu);
849 schedule_work(&sched_prefcore_work);
852 static void amd_pstate_update_limits(unsigned int cpu)
854 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
855 struct amd_cpudata *cpudata;
856 u32 prev_high = 0, cur_high = 0;
857 int ret;
858 bool highest_perf_changed = false;
860 if (!policy)
861 return;
863 cpudata = policy->driver_data;
865 if (!amd_pstate_prefcore)
866 return;
868 guard(mutex)(&amd_pstate_driver_lock);
870 ret = amd_get_highest_perf(cpu, &cur_high);
871 if (ret)
872 goto free_cpufreq_put;
874 prev_high = READ_ONCE(cpudata->prefcore_ranking);
875 highest_perf_changed = (prev_high != cur_high);
876 if (highest_perf_changed) {
877 WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
879 if (cur_high < CPPC_MAX_PERF)
880 sched_set_itmt_core_prio((int)cur_high, cpu);
883 free_cpufreq_put:
884 cpufreq_cpu_put(policy);
886 if (!highest_perf_changed)
887 cpufreq_update_policy(cpu);
892 * Get pstate transition delay time from ACPI tables that firmware set
893 * instead of using hardcode value directly.
895 static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
897 u32 transition_delay_ns;
899 transition_delay_ns = cppc_get_transition_latency(cpu);
900 if (transition_delay_ns == CPUFREQ_ETERNAL) {
901 if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC))
902 return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY;
903 else
904 return AMD_PSTATE_TRANSITION_DELAY;
907 return transition_delay_ns / NSEC_PER_USEC;
911 * Get pstate transition latency value from ACPI tables that firmware
912 * set instead of using hardcode value directly.
914 static u32 amd_pstate_get_transition_latency(unsigned int cpu)
916 u32 transition_latency;
918 transition_latency = cppc_get_transition_latency(cpu);
919 if (transition_latency == CPUFREQ_ETERNAL)
920 return AMD_PSTATE_TRANSITION_LATENCY;
922 return transition_latency;
926 * amd_pstate_init_freq: Initialize the max_freq, min_freq,
927 * nominal_freq and lowest_nonlinear_freq for
928 * the @cpudata object.
930 * Requires: highest_perf, lowest_perf, nominal_perf and
931 * lowest_nonlinear_perf members of @cpudata to be
932 * initialized.
934 * Returns 0 on success, non-zero value on failure.
936 static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
938 int ret;
939 u32 min_freq, max_freq;
940 u32 highest_perf, nominal_perf, nominal_freq;
941 u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
942 struct cppc_perf_caps cppc_perf;
944 ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
945 if (ret)
946 return ret;
948 if (quirks && quirks->lowest_freq)
949 min_freq = quirks->lowest_freq;
950 else
951 min_freq = cppc_perf.lowest_freq;
953 if (quirks && quirks->nominal_freq)
954 nominal_freq = quirks->nominal_freq;
955 else
956 nominal_freq = cppc_perf.nominal_freq;
958 highest_perf = READ_ONCE(cpudata->highest_perf);
959 nominal_perf = READ_ONCE(cpudata->nominal_perf);
960 max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf);
962 lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
963 lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf);
964 WRITE_ONCE(cpudata->min_freq, min_freq * 1000);
965 WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000);
966 WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000);
967 WRITE_ONCE(cpudata->max_freq, max_freq * 1000);
970 * Below values need to be initialized correctly, otherwise driver will fail to load
971 * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf
972 * lowest_nonlinear_freq is a value between [min_freq, nominal_freq]
973 * Check _CPC in ACPI table objects if any values are incorrect
975 if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) {
976 pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n",
977 min_freq, max_freq, nominal_freq);
978 return -EINVAL;
981 if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) {
982 pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n",
983 lowest_nonlinear_freq, min_freq, nominal_freq);
984 return -EINVAL;
987 return 0;
990 static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
992 int min_freq, max_freq, ret;
993 struct device *dev;
994 struct amd_cpudata *cpudata;
997 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
998 * which is ideal for initialization process.
1000 amd_perf_ctl_reset(policy->cpu);
1001 dev = get_cpu_device(policy->cpu);
1002 if (!dev)
1003 return -ENODEV;
1005 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
1006 if (!cpudata)
1007 return -ENOMEM;
1009 cpudata->cpu = policy->cpu;
1011 ret = amd_pstate_init_perf(cpudata);
1012 if (ret)
1013 goto free_cpudata1;
1015 amd_pstate_init_prefcore(cpudata);
1017 ret = amd_pstate_init_freq(cpudata);
1018 if (ret)
1019 goto free_cpudata1;
1021 ret = amd_pstate_init_boost_support(cpudata);
1022 if (ret)
1023 goto free_cpudata1;
1025 min_freq = READ_ONCE(cpudata->min_freq);
1026 max_freq = READ_ONCE(cpudata->max_freq);
1028 policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
1029 policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
1031 policy->min = min_freq;
1032 policy->max = max_freq;
1034 policy->cpuinfo.min_freq = min_freq;
1035 policy->cpuinfo.max_freq = max_freq;
1037 policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
1039 /* It will be updated by governor */
1040 policy->cur = policy->cpuinfo.min_freq;
1042 if (cpu_feature_enabled(X86_FEATURE_CPPC))
1043 policy->fast_switch_possible = true;
1045 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
1046 FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE);
1047 if (ret < 0) {
1048 dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
1049 goto free_cpudata1;
1052 ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
1053 FREQ_QOS_MAX, policy->cpuinfo.max_freq);
1054 if (ret < 0) {
1055 dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
1056 goto free_cpudata2;
1059 cpudata->max_limit_freq = max_freq;
1060 cpudata->min_limit_freq = min_freq;
1062 policy->driver_data = cpudata;
1064 if (!current_pstate_driver->adjust_perf)
1065 current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
1067 return 0;
1069 free_cpudata2:
1070 freq_qos_remove_request(&cpudata->req[0]);
1071 free_cpudata1:
1072 kfree(cpudata);
1073 return ret;
1076 static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
1078 struct amd_cpudata *cpudata = policy->driver_data;
1080 freq_qos_remove_request(&cpudata->req[1]);
1081 freq_qos_remove_request(&cpudata->req[0]);
1082 policy->fast_switch_possible = false;
1083 kfree(cpudata);
1086 static int amd_pstate_cpu_resume(struct cpufreq_policy *policy)
1088 int ret;
1090 ret = amd_pstate_cppc_enable(true);
1091 if (ret)
1092 pr_err("failed to enable amd-pstate during resume, return %d\n", ret);
1094 return ret;
1097 static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy)
1099 int ret;
1101 ret = amd_pstate_cppc_enable(false);
1102 if (ret)
1103 pr_err("failed to disable amd-pstate during suspend, return %d\n", ret);
1105 return ret;
1108 /* Sysfs attributes */
1111 * This frequency is to indicate the maximum hardware frequency.
1112 * If boost is not active but supported, the frequency will be larger than the
1113 * one in cpuinfo.
1115 static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
1116 char *buf)
1118 int max_freq;
1119 struct amd_cpudata *cpudata = policy->driver_data;
1121 max_freq = READ_ONCE(cpudata->max_freq);
1122 if (max_freq < 0)
1123 return max_freq;
1125 return sysfs_emit(buf, "%u\n", max_freq);
1128 static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
1129 char *buf)
1131 int freq;
1132 struct amd_cpudata *cpudata = policy->driver_data;
1134 freq = READ_ONCE(cpudata->lowest_nonlinear_freq);
1135 if (freq < 0)
1136 return freq;
1138 return sysfs_emit(buf, "%u\n", freq);
1142 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we
1143 * need to expose it to sysfs.
1145 static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
1146 char *buf)
1148 u32 perf;
1149 struct amd_cpudata *cpudata = policy->driver_data;
1151 perf = READ_ONCE(cpudata->highest_perf);
1153 return sysfs_emit(buf, "%u\n", perf);
1156 static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
1157 char *buf)
1159 u32 perf;
1160 struct amd_cpudata *cpudata = policy->driver_data;
1162 perf = READ_ONCE(cpudata->prefcore_ranking);
1164 return sysfs_emit(buf, "%u\n", perf);
1167 static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
1168 char *buf)
1170 bool hw_prefcore;
1171 struct amd_cpudata *cpudata = policy->driver_data;
1173 hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
1175 return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore));
1178 static ssize_t show_energy_performance_available_preferences(
1179 struct cpufreq_policy *policy, char *buf)
1181 int i = 0;
1182 int offset = 0;
1183 struct amd_cpudata *cpudata = policy->driver_data;
1185 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1186 return sysfs_emit_at(buf, offset, "%s\n",
1187 energy_perf_strings[EPP_INDEX_PERFORMANCE]);
1189 while (energy_perf_strings[i] != NULL)
1190 offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
1192 offset += sysfs_emit_at(buf, offset, "\n");
1194 return offset;
1197 static ssize_t store_energy_performance_preference(
1198 struct cpufreq_policy *policy, const char *buf, size_t count)
1200 char str_preference[21];
1201 ssize_t ret;
1203 ret = sscanf(buf, "%20s", str_preference);
1204 if (ret != 1)
1205 return -EINVAL;
1207 ret = match_string(energy_perf_strings, -1, str_preference);
1208 if (ret < 0)
1209 return -EINVAL;
1211 guard(mutex)(&amd_pstate_limits_lock);
1213 ret = amd_pstate_set_energy_pref_index(policy, ret);
1215 return ret ? ret : count;
1218 static ssize_t show_energy_performance_preference(
1219 struct cpufreq_policy *policy, char *buf)
1221 struct amd_cpudata *cpudata = policy->driver_data;
1222 int preference;
1224 switch (cpudata->epp_cached) {
1225 case AMD_CPPC_EPP_PERFORMANCE:
1226 preference = EPP_INDEX_PERFORMANCE;
1227 break;
1228 case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
1229 preference = EPP_INDEX_BALANCE_PERFORMANCE;
1230 break;
1231 case AMD_CPPC_EPP_BALANCE_POWERSAVE:
1232 preference = EPP_INDEX_BALANCE_POWERSAVE;
1233 break;
1234 case AMD_CPPC_EPP_POWERSAVE:
1235 preference = EPP_INDEX_POWERSAVE;
1236 break;
1237 default:
1238 return -EINVAL;
1241 return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
1244 static void amd_pstate_driver_cleanup(void)
1246 amd_pstate_cppc_enable(false);
1247 cppc_state = AMD_PSTATE_DISABLE;
1248 current_pstate_driver = NULL;
1251 static int amd_pstate_set_driver(int mode_idx)
1253 if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
1254 cppc_state = mode_idx;
1255 if (cppc_state == AMD_PSTATE_DISABLE)
1256 pr_info("driver is explicitly disabled\n");
1258 if (cppc_state == AMD_PSTATE_ACTIVE)
1259 current_pstate_driver = &amd_pstate_epp_driver;
1261 if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
1262 current_pstate_driver = &amd_pstate_driver;
1264 return 0;
1267 return -EINVAL;
1270 static int amd_pstate_register_driver(int mode)
1272 int ret;
1274 ret = amd_pstate_set_driver(mode);
1275 if (ret)
1276 return ret;
1278 cppc_state = mode;
1280 ret = amd_pstate_cppc_enable(true);
1281 if (ret) {
1282 pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n",
1283 ret);
1284 amd_pstate_driver_cleanup();
1285 return ret;
1288 /* at least one CPU supports CPB */
1289 current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB);
1291 ret = cpufreq_register_driver(current_pstate_driver);
1292 if (ret) {
1293 amd_pstate_driver_cleanup();
1294 return ret;
1297 return 0;
1300 static int amd_pstate_unregister_driver(int dummy)
1302 cpufreq_unregister_driver(current_pstate_driver);
1303 amd_pstate_driver_cleanup();
1304 return 0;
1307 static int amd_pstate_change_mode_without_dvr_change(int mode)
1309 int cpu = 0;
1311 cppc_state = mode;
1313 if (cpu_feature_enabled(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE)
1314 return 0;
1316 for_each_present_cpu(cpu) {
1317 cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
1320 return 0;
1323 static int amd_pstate_change_driver_mode(int mode)
1325 int ret;
1327 ret = amd_pstate_unregister_driver(0);
1328 if (ret)
1329 return ret;
1331 ret = amd_pstate_register_driver(mode);
1332 if (ret)
1333 return ret;
1335 return 0;
1338 static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = {
1339 [AMD_PSTATE_DISABLE] = {
1340 [AMD_PSTATE_DISABLE] = NULL,
1341 [AMD_PSTATE_PASSIVE] = amd_pstate_register_driver,
1342 [AMD_PSTATE_ACTIVE] = amd_pstate_register_driver,
1343 [AMD_PSTATE_GUIDED] = amd_pstate_register_driver,
1345 [AMD_PSTATE_PASSIVE] = {
1346 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver,
1347 [AMD_PSTATE_PASSIVE] = NULL,
1348 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode,
1349 [AMD_PSTATE_GUIDED] = amd_pstate_change_mode_without_dvr_change,
1351 [AMD_PSTATE_ACTIVE] = {
1352 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver,
1353 [AMD_PSTATE_PASSIVE] = amd_pstate_change_driver_mode,
1354 [AMD_PSTATE_ACTIVE] = NULL,
1355 [AMD_PSTATE_GUIDED] = amd_pstate_change_driver_mode,
1357 [AMD_PSTATE_GUIDED] = {
1358 [AMD_PSTATE_DISABLE] = amd_pstate_unregister_driver,
1359 [AMD_PSTATE_PASSIVE] = amd_pstate_change_mode_without_dvr_change,
1360 [AMD_PSTATE_ACTIVE] = amd_pstate_change_driver_mode,
1361 [AMD_PSTATE_GUIDED] = NULL,
1365 static ssize_t amd_pstate_show_status(char *buf)
1367 if (!current_pstate_driver)
1368 return sysfs_emit(buf, "disable\n");
1370 return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
1373 int amd_pstate_update_status(const char *buf, size_t size)
1375 int mode_idx;
1377 if (size > strlen("passive") || size < strlen("active"))
1378 return -EINVAL;
1380 mode_idx = get_mode_idx_from_str(buf, size);
1382 if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX)
1383 return -EINVAL;
1385 if (mode_state_machine[cppc_state][mode_idx])
1386 return mode_state_machine[cppc_state][mode_idx](mode_idx);
1388 return 0;
1390 EXPORT_SYMBOL_GPL(amd_pstate_update_status);
1392 static ssize_t status_show(struct device *dev,
1393 struct device_attribute *attr, char *buf)
1396 guard(mutex)(&amd_pstate_driver_lock);
1398 return amd_pstate_show_status(buf);
1401 static ssize_t status_store(struct device *a, struct device_attribute *b,
1402 const char *buf, size_t count)
1404 char *p = memchr(buf, '\n', count);
1405 int ret;
1407 guard(mutex)(&amd_pstate_driver_lock);
1408 ret = amd_pstate_update_status(buf, p ? p - buf : count);
1410 return ret < 0 ? ret : count;
1413 static ssize_t prefcore_show(struct device *dev,
1414 struct device_attribute *attr, char *buf)
1416 return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
1419 cpufreq_freq_attr_ro(amd_pstate_max_freq);
1420 cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
1422 cpufreq_freq_attr_ro(amd_pstate_highest_perf);
1423 cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
1424 cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
1425 cpufreq_freq_attr_rw(energy_performance_preference);
1426 cpufreq_freq_attr_ro(energy_performance_available_preferences);
1427 static DEVICE_ATTR_RW(status);
1428 static DEVICE_ATTR_RO(prefcore);
1430 static struct freq_attr *amd_pstate_attr[] = {
1431 &amd_pstate_max_freq,
1432 &amd_pstate_lowest_nonlinear_freq,
1433 &amd_pstate_highest_perf,
1434 &amd_pstate_prefcore_ranking,
1435 &amd_pstate_hw_prefcore,
1436 NULL,
1439 static struct freq_attr *amd_pstate_epp_attr[] = {
1440 &amd_pstate_max_freq,
1441 &amd_pstate_lowest_nonlinear_freq,
1442 &amd_pstate_highest_perf,
1443 &amd_pstate_prefcore_ranking,
1444 &amd_pstate_hw_prefcore,
1445 &energy_performance_preference,
1446 &energy_performance_available_preferences,
1447 NULL,
1450 static struct attribute *pstate_global_attributes[] = {
1451 &dev_attr_status.attr,
1452 &dev_attr_prefcore.attr,
1453 NULL
1456 static const struct attribute_group amd_pstate_global_attr_group = {
1457 .name = "amd_pstate",
1458 .attrs = pstate_global_attributes,
1461 static bool amd_pstate_acpi_pm_profile_server(void)
1463 switch (acpi_gbl_FADT.preferred_profile) {
1464 case PM_ENTERPRISE_SERVER:
1465 case PM_SOHO_SERVER:
1466 case PM_PERFORMANCE_SERVER:
1467 return true;
1469 return false;
1472 static bool amd_pstate_acpi_pm_profile_undefined(void)
1474 if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED)
1475 return true;
1476 if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES)
1477 return true;
1478 return false;
1481 static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
1483 int min_freq, max_freq, ret;
1484 struct amd_cpudata *cpudata;
1485 struct device *dev;
1486 u64 value;
1489 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
1490 * which is ideal for initialization process.
1492 amd_perf_ctl_reset(policy->cpu);
1493 dev = get_cpu_device(policy->cpu);
1494 if (!dev)
1495 return -ENODEV;
1497 cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
1498 if (!cpudata)
1499 return -ENOMEM;
1501 cpudata->cpu = policy->cpu;
1503 ret = amd_pstate_init_perf(cpudata);
1504 if (ret)
1505 goto free_cpudata1;
1507 amd_pstate_init_prefcore(cpudata);
1509 ret = amd_pstate_init_freq(cpudata);
1510 if (ret)
1511 goto free_cpudata1;
1513 ret = amd_pstate_init_boost_support(cpudata);
1514 if (ret)
1515 goto free_cpudata1;
1517 min_freq = READ_ONCE(cpudata->min_freq);
1518 max_freq = READ_ONCE(cpudata->max_freq);
1520 policy->cpuinfo.min_freq = min_freq;
1521 policy->cpuinfo.max_freq = max_freq;
1522 /* It will be updated by governor */
1523 policy->cur = policy->cpuinfo.min_freq;
1525 policy->driver_data = cpudata;
1527 policy->min = policy->cpuinfo.min_freq;
1528 policy->max = policy->cpuinfo.max_freq;
1530 policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
1533 * Set the policy to provide a valid fallback value in case
1534 * the default cpufreq governor is neither powersave nor performance.
1536 if (amd_pstate_acpi_pm_profile_server() ||
1537 amd_pstate_acpi_pm_profile_undefined()) {
1538 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1539 cpudata->epp_default = amd_pstate_get_epp(cpudata);
1540 } else {
1541 policy->policy = CPUFREQ_POLICY_POWERSAVE;
1542 cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
1545 if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1546 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
1547 if (ret)
1548 return ret;
1549 WRITE_ONCE(cpudata->cppc_req_cached, value);
1551 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
1552 if (ret)
1553 return ret;
1554 WRITE_ONCE(cpudata->cppc_cap1_cached, value);
1556 ret = amd_pstate_set_epp(cpudata, cpudata->epp_default);
1557 if (ret)
1558 return ret;
1560 current_pstate_driver->adjust_perf = NULL;
1562 return 0;
1564 free_cpudata1:
1565 kfree(cpudata);
1566 return ret;
1569 static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
1571 struct amd_cpudata *cpudata = policy->driver_data;
1573 if (cpudata) {
1574 kfree(cpudata);
1575 policy->driver_data = NULL;
1578 pr_debug("CPU %d exiting\n", policy->cpu);
1581 static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
1583 struct amd_cpudata *cpudata = policy->driver_data;
1584 u32 epp;
1586 amd_pstate_update_min_max_limit(policy);
1588 if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1589 epp = 0;
1590 else
1591 epp = READ_ONCE(cpudata->epp_cached);
1593 if (trace_amd_pstate_epp_perf_enabled()) {
1594 trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp,
1595 cpudata->min_limit_perf,
1596 cpudata->max_limit_perf,
1597 policy->boost_enabled);
1600 return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U,
1601 cpudata->max_limit_perf, epp, false);
1604 static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
1606 struct amd_cpudata *cpudata = policy->driver_data;
1607 int ret;
1609 if (!policy->cpuinfo.max_freq)
1610 return -ENODEV;
1612 pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
1613 policy->cpuinfo.max_freq, policy->max);
1615 cpudata->policy = policy->policy;
1617 ret = amd_pstate_epp_update_limit(policy);
1618 if (ret)
1619 return ret;
1622 * policy->cur is never updated with the amd_pstate_epp driver, but it
1623 * is used as a stale frequency value. So, keep it within limits.
1625 policy->cur = policy->min;
1627 return 0;
1630 static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
1632 struct amd_cpudata *cpudata = policy->driver_data;
1633 u64 max_perf;
1634 int ret;
1636 ret = amd_pstate_cppc_enable(true);
1637 if (ret)
1638 pr_err("failed to enable amd pstate during resume, return %d\n", ret);
1640 max_perf = READ_ONCE(cpudata->highest_perf);
1642 if (trace_amd_pstate_epp_perf_enabled()) {
1643 trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
1644 cpudata->epp_cached,
1645 FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
1646 max_perf, policy->boost_enabled);
1649 return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false);
1652 static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
1654 struct amd_cpudata *cpudata = policy->driver_data;
1655 int ret;
1657 pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
1659 ret = amd_pstate_epp_reenable(policy);
1660 if (ret)
1661 return ret;
1662 cpudata->suspended = false;
1664 return 0;
1667 static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
1669 struct amd_cpudata *cpudata = policy->driver_data;
1670 int min_perf;
1672 if (cpudata->suspended)
1673 return 0;
1675 min_perf = READ_ONCE(cpudata->lowest_perf);
1677 guard(mutex)(&amd_pstate_limits_lock);
1679 if (trace_amd_pstate_epp_perf_enabled()) {
1680 trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
1681 AMD_CPPC_EPP_BALANCE_POWERSAVE,
1682 min_perf, min_perf, policy->boost_enabled);
1685 return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf,
1686 AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
1689 static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
1691 struct amd_cpudata *cpudata = policy->driver_data;
1692 int ret;
1694 /* avoid suspending when EPP is not enabled */
1695 if (cppc_state != AMD_PSTATE_ACTIVE)
1696 return 0;
1698 /* set this flag to avoid setting core offline*/
1699 cpudata->suspended = true;
1701 /* disable CPPC in lowlevel firmware */
1702 ret = amd_pstate_cppc_enable(false);
1703 if (ret)
1704 pr_err("failed to suspend, return %d\n", ret);
1706 return 0;
1709 static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
1711 struct amd_cpudata *cpudata = policy->driver_data;
1713 if (cpudata->suspended) {
1714 guard(mutex)(&amd_pstate_limits_lock);
1716 /* enable amd pstate from suspend state*/
1717 amd_pstate_epp_reenable(policy);
1719 cpudata->suspended = false;
1722 return 0;
1725 static struct cpufreq_driver amd_pstate_driver = {
1726 .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
1727 .verify = amd_pstate_verify,
1728 .target = amd_pstate_target,
1729 .fast_switch = amd_pstate_fast_switch,
1730 .init = amd_pstate_cpu_init,
1731 .exit = amd_pstate_cpu_exit,
1732 .suspend = amd_pstate_cpu_suspend,
1733 .resume = amd_pstate_cpu_resume,
1734 .set_boost = amd_pstate_set_boost,
1735 .update_limits = amd_pstate_update_limits,
1736 .name = "amd-pstate",
1737 .attr = amd_pstate_attr,
1740 static struct cpufreq_driver amd_pstate_epp_driver = {
1741 .flags = CPUFREQ_CONST_LOOPS,
1742 .verify = amd_pstate_verify,
1743 .setpolicy = amd_pstate_epp_set_policy,
1744 .init = amd_pstate_epp_cpu_init,
1745 .exit = amd_pstate_epp_cpu_exit,
1746 .offline = amd_pstate_epp_cpu_offline,
1747 .online = amd_pstate_epp_cpu_online,
1748 .suspend = amd_pstate_epp_suspend,
1749 .resume = amd_pstate_epp_resume,
1750 .update_limits = amd_pstate_update_limits,
1751 .set_boost = amd_pstate_set_boost,
1752 .name = "amd-pstate-epp",
1753 .attr = amd_pstate_epp_attr,
1757 * CPPC function is not supported for family ID 17H with model_ID ranging from 0x10 to 0x2F.
1758 * show the debug message that helps to check if the CPU has CPPC support for loading issue.
1760 static bool amd_cppc_supported(void)
1762 struct cpuinfo_x86 *c = &cpu_data(0);
1763 bool warn = false;
1765 if ((boot_cpu_data.x86 == 0x17) && (boot_cpu_data.x86_model < 0x30)) {
1766 pr_debug_once("CPPC feature is not supported by the processor\n");
1767 return false;
1771 * If the CPPC feature is disabled in the BIOS for processors
1772 * that support MSR-based CPPC, the AMD Pstate driver may not
1773 * function correctly.
1775 * For such processors, check the CPPC flag and display a
1776 * warning message if the platform supports CPPC.
1778 * Note: The code check below will not abort the driver
1779 * registration process because of the code is added for
1780 * debugging purposes. Besides, it may still be possible for
1781 * the driver to work using the shared-memory mechanism.
1783 if (!cpu_feature_enabled(X86_FEATURE_CPPC)) {
1784 if (cpu_feature_enabled(X86_FEATURE_ZEN2)) {
1785 switch (c->x86_model) {
1786 case 0x60 ... 0x6F:
1787 case 0x80 ... 0xAF:
1788 warn = true;
1789 break;
1791 } else if (cpu_feature_enabled(X86_FEATURE_ZEN3) ||
1792 cpu_feature_enabled(X86_FEATURE_ZEN4)) {
1793 switch (c->x86_model) {
1794 case 0x10 ... 0x1F:
1795 case 0x40 ... 0xAF:
1796 warn = true;
1797 break;
1799 } else if (cpu_feature_enabled(X86_FEATURE_ZEN5)) {
1800 warn = true;
1804 if (warn)
1805 pr_warn_once("The CPPC feature is supported but currently disabled by the BIOS.\n"
1806 "Please enable it if your BIOS has the CPPC option.\n");
1807 return true;
1810 static int __init amd_pstate_init(void)
1812 struct device *dev_root;
1813 int ret;
1815 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
1816 return -ENODEV;
1818 /* show debug message only if CPPC is not supported */
1819 if (!amd_cppc_supported())
1820 return -EOPNOTSUPP;
1822 /* show warning message when BIOS broken or ACPI disabled */
1823 if (!acpi_cpc_valid()) {
1824 pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
1825 return -ENODEV;
1828 /* don't keep reloading if cpufreq_driver exists */
1829 if (cpufreq_get_current_driver())
1830 return -EEXIST;
1832 quirks = NULL;
1834 /* check if this machine need CPPC quirks */
1835 dmi_check_system(amd_pstate_quirks_table);
1838 * determine the driver mode from the command line or kernel config.
1839 * If no command line input is provided, cppc_state will be AMD_PSTATE_UNDEFINED.
1840 * command line options will override the kernel config settings.
1843 if (cppc_state == AMD_PSTATE_UNDEFINED) {
1844 /* Disable on the following configs by default:
1845 * 1. Undefined platforms
1846 * 2. Server platforms with CPUs older than Family 0x1A.
1848 if (amd_pstate_acpi_pm_profile_undefined() ||
1849 (amd_pstate_acpi_pm_profile_server() && boot_cpu_data.x86 < 0x1A)) {
1850 pr_info("driver load is disabled, boot with specific mode to enable this\n");
1851 return -ENODEV;
1853 /* get driver mode from kernel config option [1:4] */
1854 cppc_state = CONFIG_X86_AMD_PSTATE_DEFAULT_MODE;
1857 if (cppc_state == AMD_PSTATE_DISABLE) {
1858 pr_info("driver load is disabled, boot with specific mode to enable this\n");
1859 return -ENODEV;
1862 /* capability check */
1863 if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
1864 pr_debug("AMD CPPC MSR based functionality is supported\n");
1865 } else {
1866 pr_debug("AMD CPPC shared memory based functionality is supported\n");
1867 static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable);
1868 static_call_update(amd_pstate_init_perf, shmem_init_perf);
1869 static_call_update(amd_pstate_update_perf, shmem_update_perf);
1870 static_call_update(amd_pstate_get_epp, shmem_get_epp);
1871 static_call_update(amd_pstate_set_epp, shmem_set_epp);
1874 if (amd_pstate_prefcore) {
1875 ret = amd_detect_prefcore(&amd_pstate_prefcore);
1876 if (ret)
1877 return ret;
1880 ret = amd_pstate_register_driver(cppc_state);
1881 if (ret) {
1882 pr_err("failed to register with return %d\n", ret);
1883 return ret;
1886 dev_root = bus_get_dev_root(&cpu_subsys);
1887 if (dev_root) {
1888 ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group);
1889 put_device(dev_root);
1890 if (ret) {
1891 pr_err("sysfs attribute export failed with error %d.\n", ret);
1892 goto global_attr_free;
1896 return ret;
1898 global_attr_free:
1899 cpufreq_unregister_driver(current_pstate_driver);
1900 amd_pstate_cppc_enable(false);
1901 return ret;
1903 device_initcall(amd_pstate_init);
1905 static int __init amd_pstate_param(char *str)
1907 size_t size;
1908 int mode_idx;
1910 if (!str)
1911 return -EINVAL;
1913 size = strlen(str);
1914 mode_idx = get_mode_idx_from_str(str, size);
1916 return amd_pstate_set_driver(mode_idx);
1919 static int __init amd_prefcore_param(char *str)
1921 if (!strcmp(str, "disable"))
1922 amd_pstate_prefcore = false;
1924 return 0;
1927 early_param("amd_pstate", amd_pstate_param);
1928 early_param("amd_prefcore", amd_prefcore_param);
1930 MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
1931 MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");