1 // SPDX-License-Identifier: GPL-2.0-only
3 * Common code for Intel Running Average Power Limit (RAPL) support.
4 * Copyright (c) 2019, Intel Corporation.
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/list.h>
11 #include <linux/types.h>
12 #include <linux/device.h>
13 #include <linux/slab.h>
14 #include <linux/log2.h>
15 #include <linux/bitmap.h>
16 #include <linux/delay.h>
17 #include <linux/sysfs.h>
18 #include <linux/cpu.h>
19 #include <linux/powercap.h>
20 #include <linux/suspend.h>
21 #include <linux/intel_rapl.h>
22 #include <linux/processor.h>
23 #include <linux/platform_device.h>
25 #include <asm/iosf_mbi.h>
26 #include <asm/cpu_device_id.h>
27 #include <asm/intel-family.h>
29 /* bitmasks for RAPL MSRs, used by primitive access functions */
30 #define ENERGY_STATUS_MASK 0xffffffff
32 #define POWER_LIMIT1_MASK 0x7FFF
33 #define POWER_LIMIT1_ENABLE BIT(15)
34 #define POWER_LIMIT1_CLAMP BIT(16)
36 #define POWER_LIMIT2_MASK (0x7FFFULL<<32)
37 #define POWER_LIMIT2_ENABLE BIT_ULL(47)
38 #define POWER_LIMIT2_CLAMP BIT_ULL(48)
39 #define POWER_HIGH_LOCK BIT_ULL(63)
40 #define POWER_LOW_LOCK BIT(31)
42 #define POWER_LIMIT4_MASK 0x1FFF
44 #define TIME_WINDOW1_MASK (0x7FULL<<17)
45 #define TIME_WINDOW2_MASK (0x7FULL<<49)
47 #define POWER_UNIT_OFFSET 0
48 #define POWER_UNIT_MASK 0x0F
50 #define ENERGY_UNIT_OFFSET 0x08
51 #define ENERGY_UNIT_MASK 0x1F00
53 #define TIME_UNIT_OFFSET 0x10
54 #define TIME_UNIT_MASK 0xF0000
56 #define POWER_INFO_MAX_MASK (0x7fffULL<<32)
57 #define POWER_INFO_MIN_MASK (0x7fffULL<<16)
58 #define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48)
59 #define POWER_INFO_THERMAL_SPEC_MASK 0x7fff
61 #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
62 #define PP_POLICY_MASK 0x1F
64 /* Non HW constants */
65 #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
66 #define RAPL_PRIMITIVE_DUMMY BIT(2)
68 #define TIME_WINDOW_MAX_MSEC 40000
69 #define TIME_WINDOW_MIN_MSEC 250
70 #define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
72 ARBITRARY_UNIT
, /* no translation */
78 /* per domain data, some are optional */
79 #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2)
81 #define DOMAIN_STATE_INACTIVE BIT(0)
82 #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
83 #define DOMAIN_STATE_BIOS_LOCKED BIT(2)
85 static const char pl1_name
[] = "long_term";
86 static const char pl2_name
[] = "short_term";
87 static const char pl4_name
[] = "peak_power";
89 #define power_zone_to_rapl_domain(_zone) \
90 container_of(_zone, struct rapl_domain, power_zone)
92 struct rapl_defaults
{
93 u8 floor_freq_reg_addr
;
94 int (*check_unit
)(struct rapl_package
*rp
, int cpu
);
95 void (*set_floor_freq
)(struct rapl_domain
*rd
, bool mode
);
96 u64 (*compute_time_window
)(struct rapl_package
*rp
, u64 val
,
98 unsigned int dram_domain_energy_unit
;
99 unsigned int psys_domain_energy_unit
;
101 static struct rapl_defaults
*rapl_defaults
;
103 /* Sideband MBI registers */
104 #define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
105 #define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf)
107 #define PACKAGE_PLN_INT_SAVED BIT(0)
108 #define MAX_PRIM_NAME (32)
110 /* per domain data. used to describe individual knobs such that access function
111 * can be consolidated into one instead of many inline functions.
113 struct rapl_primitive_info
{
117 enum rapl_domain_reg_id id
;
122 #define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \
131 static void rapl_init_domains(struct rapl_package
*rp
);
132 static int rapl_read_data_raw(struct rapl_domain
*rd
,
133 enum rapl_primitives prim
,
134 bool xlate
, u64
*data
);
135 static int rapl_write_data_raw(struct rapl_domain
*rd
,
136 enum rapl_primitives prim
,
137 unsigned long long value
);
138 static u64
rapl_unit_xlate(struct rapl_domain
*rd
,
139 enum unit_type type
, u64 value
, int to_raw
);
140 static void package_power_limit_irq_save(struct rapl_package
*rp
);
142 static LIST_HEAD(rapl_packages
); /* guarded by CPU hotplug lock */
144 static const char *const rapl_domain_names
[] = {
152 static int get_energy_counter(struct powercap_zone
*power_zone
,
155 struct rapl_domain
*rd
;
158 /* prevent CPU hotplug, make sure the RAPL domain does not go
159 * away while reading the counter.
162 rd
= power_zone_to_rapl_domain(power_zone
);
164 if (!rapl_read_data_raw(rd
, ENERGY_COUNTER
, true, &energy_now
)) {
165 *energy_raw
= energy_now
;
175 static int get_max_energy_counter(struct powercap_zone
*pcd_dev
, u64
*energy
)
177 struct rapl_domain
*rd
= power_zone_to_rapl_domain(pcd_dev
);
179 *energy
= rapl_unit_xlate(rd
, ENERGY_UNIT
, ENERGY_STATUS_MASK
, 0);
183 static int release_zone(struct powercap_zone
*power_zone
)
185 struct rapl_domain
*rd
= power_zone_to_rapl_domain(power_zone
);
186 struct rapl_package
*rp
= rd
->rp
;
188 /* package zone is the last zone of a package, we can free
189 * memory here since all children has been unregistered.
191 if (rd
->id
== RAPL_DOMAIN_PACKAGE
) {
200 static int find_nr_power_limit(struct rapl_domain
*rd
)
204 for (i
= 0; i
< NR_POWER_LIMITS
; i
++) {
212 static int set_domain_enable(struct powercap_zone
*power_zone
, bool mode
)
214 struct rapl_domain
*rd
= power_zone_to_rapl_domain(power_zone
);
216 if (rd
->state
& DOMAIN_STATE_BIOS_LOCKED
)
220 rapl_write_data_raw(rd
, PL1_ENABLE
, mode
);
221 if (rapl_defaults
->set_floor_freq
)
222 rapl_defaults
->set_floor_freq(rd
, mode
);
228 static int get_domain_enable(struct powercap_zone
*power_zone
, bool *mode
)
230 struct rapl_domain
*rd
= power_zone_to_rapl_domain(power_zone
);
233 if (rd
->state
& DOMAIN_STATE_BIOS_LOCKED
) {
238 if (rapl_read_data_raw(rd
, PL1_ENABLE
, true, &val
)) {
248 /* per RAPL domain ops, in the order of rapl_domain_type */
249 static const struct powercap_zone_ops zone_ops
[] = {
250 /* RAPL_DOMAIN_PACKAGE */
252 .get_energy_uj
= get_energy_counter
,
253 .get_max_energy_range_uj
= get_max_energy_counter
,
254 .release
= release_zone
,
255 .set_enable
= set_domain_enable
,
256 .get_enable
= get_domain_enable
,
258 /* RAPL_DOMAIN_PP0 */
260 .get_energy_uj
= get_energy_counter
,
261 .get_max_energy_range_uj
= get_max_energy_counter
,
262 .release
= release_zone
,
263 .set_enable
= set_domain_enable
,
264 .get_enable
= get_domain_enable
,
266 /* RAPL_DOMAIN_PP1 */
268 .get_energy_uj
= get_energy_counter
,
269 .get_max_energy_range_uj
= get_max_energy_counter
,
270 .release
= release_zone
,
271 .set_enable
= set_domain_enable
,
272 .get_enable
= get_domain_enable
,
274 /* RAPL_DOMAIN_DRAM */
276 .get_energy_uj
= get_energy_counter
,
277 .get_max_energy_range_uj
= get_max_energy_counter
,
278 .release
= release_zone
,
279 .set_enable
= set_domain_enable
,
280 .get_enable
= get_domain_enable
,
282 /* RAPL_DOMAIN_PLATFORM */
284 .get_energy_uj
= get_energy_counter
,
285 .get_max_energy_range_uj
= get_max_energy_counter
,
286 .release
= release_zone
,
287 .set_enable
= set_domain_enable
,
288 .get_enable
= get_domain_enable
,
293 * Constraint index used by powercap can be different than power limit (PL)
294 * index in that some PLs maybe missing due to non-existent MSRs. So we
295 * need to convert here by finding the valid PLs only (name populated).
297 static int contraint_to_pl(struct rapl_domain
*rd
, int cid
)
301 for (i
= 0, j
= 0; i
< NR_POWER_LIMITS
; i
++) {
302 if ((rd
->rpl
[i
].name
) && j
++ == cid
) {
303 pr_debug("%s: index %d\n", __func__
, i
);
307 pr_err("Cannot find matching power limit for constraint %d\n", cid
);
312 static int set_power_limit(struct powercap_zone
*power_zone
, int cid
,
315 struct rapl_domain
*rd
;
316 struct rapl_package
*rp
;
321 rd
= power_zone_to_rapl_domain(power_zone
);
322 id
= contraint_to_pl(rd
, cid
);
330 if (rd
->state
& DOMAIN_STATE_BIOS_LOCKED
) {
331 dev_warn(&power_zone
->dev
,
332 "%s locked by BIOS, monitoring only\n", rd
->name
);
337 switch (rd
->rpl
[id
].prim_id
) {
339 rapl_write_data_raw(rd
, POWER_LIMIT1
, power_limit
);
342 rapl_write_data_raw(rd
, POWER_LIMIT2
, power_limit
);
345 rapl_write_data_raw(rd
, POWER_LIMIT4
, power_limit
);
351 package_power_limit_irq_save(rp
);
357 static int get_current_power_limit(struct powercap_zone
*power_zone
, int cid
,
360 struct rapl_domain
*rd
;
367 rd
= power_zone_to_rapl_domain(power_zone
);
368 id
= contraint_to_pl(rd
, cid
);
374 switch (rd
->rpl
[id
].prim_id
) {
388 if (rapl_read_data_raw(rd
, prim
, true, &val
))
399 static int set_time_window(struct powercap_zone
*power_zone
, int cid
,
402 struct rapl_domain
*rd
;
407 rd
= power_zone_to_rapl_domain(power_zone
);
408 id
= contraint_to_pl(rd
, cid
);
414 switch (rd
->rpl
[id
].prim_id
) {
416 rapl_write_data_raw(rd
, TIME_WINDOW1
, window
);
419 rapl_write_data_raw(rd
, TIME_WINDOW2
, window
);
430 static int get_time_window(struct powercap_zone
*power_zone
, int cid
,
433 struct rapl_domain
*rd
;
439 rd
= power_zone_to_rapl_domain(power_zone
);
440 id
= contraint_to_pl(rd
, cid
);
446 switch (rd
->rpl
[id
].prim_id
) {
448 ret
= rapl_read_data_raw(rd
, TIME_WINDOW1
, true, &val
);
451 ret
= rapl_read_data_raw(rd
, TIME_WINDOW2
, true, &val
);
455 * Time window parameter is not applicable for PL4 entry
456 * so assigining '0' as default value.
473 static const char *get_constraint_name(struct powercap_zone
*power_zone
,
476 struct rapl_domain
*rd
;
479 rd
= power_zone_to_rapl_domain(power_zone
);
480 id
= contraint_to_pl(rd
, cid
);
482 return rd
->rpl
[id
].name
;
487 static int get_max_power(struct powercap_zone
*power_zone
, int id
, u64
*data
)
489 struct rapl_domain
*rd
;
495 rd
= power_zone_to_rapl_domain(power_zone
);
496 switch (rd
->rpl
[id
].prim_id
) {
498 prim
= THERMAL_SPEC_POWER
;
510 if (rapl_read_data_raw(rd
, prim
, true, &val
))
515 /* As a generalization rule, PL4 would be around two times PL2. */
516 if (rd
->rpl
[id
].prim_id
== PL4_ENABLE
)
524 static const struct powercap_zone_constraint_ops constraint_ops
= {
525 .set_power_limit_uw
= set_power_limit
,
526 .get_power_limit_uw
= get_current_power_limit
,
527 .set_time_window_us
= set_time_window
,
528 .get_time_window_us
= get_time_window
,
529 .get_max_power_uw
= get_max_power
,
530 .get_name
= get_constraint_name
,
533 /* called after domain detection and package level data are set */
534 static void rapl_init_domains(struct rapl_package
*rp
)
536 enum rapl_domain_type i
;
537 enum rapl_domain_reg_id j
;
538 struct rapl_domain
*rd
= rp
->domains
;
540 for (i
= 0; i
< RAPL_DOMAIN_MAX
; i
++) {
541 unsigned int mask
= rp
->domain_map
& (1 << i
);
548 if (i
== RAPL_DOMAIN_PLATFORM
&& rp
->id
> 0) {
549 snprintf(rd
->name
, RAPL_DOMAIN_NAME_LENGTH
, "psys-%d",
550 cpu_data(rp
->lead_cpu
).phys_proc_id
);
552 snprintf(rd
->name
, RAPL_DOMAIN_NAME_LENGTH
, "%s",
553 rapl_domain_names
[i
]);
556 rd
->rpl
[0].prim_id
= PL1_ENABLE
;
557 rd
->rpl
[0].name
= pl1_name
;
560 * The PL2 power domain is applicable for limits two
563 if (rp
->priv
->limits
[i
] >= 2) {
564 rd
->rpl
[1].prim_id
= PL2_ENABLE
;
565 rd
->rpl
[1].name
= pl2_name
;
568 /* Enable PL4 domain if the total power limits are three */
569 if (rp
->priv
->limits
[i
] == 3) {
570 rd
->rpl
[2].prim_id
= PL4_ENABLE
;
571 rd
->rpl
[2].name
= pl4_name
;
574 for (j
= 0; j
< RAPL_DOMAIN_REG_MAX
; j
++)
575 rd
->regs
[j
] = rp
->priv
->regs
[i
][j
];
578 case RAPL_DOMAIN_DRAM
:
579 rd
->domain_energy_unit
=
580 rapl_defaults
->dram_domain_energy_unit
;
581 if (rd
->domain_energy_unit
)
582 pr_info("DRAM domain energy unit %dpj\n",
583 rd
->domain_energy_unit
);
585 case RAPL_DOMAIN_PLATFORM
:
586 rd
->domain_energy_unit
=
587 rapl_defaults
->psys_domain_energy_unit
;
588 if (rd
->domain_energy_unit
)
589 pr_info("Platform domain energy unit %dpj\n",
590 rd
->domain_energy_unit
);
599 static u64
rapl_unit_xlate(struct rapl_domain
*rd
, enum unit_type type
,
600 u64 value
, int to_raw
)
603 struct rapl_package
*rp
= rd
->rp
;
608 units
= rp
->power_unit
;
611 scale
= ENERGY_UNIT_SCALE
;
612 /* per domain unit takes precedence */
613 if (rd
->domain_energy_unit
)
614 units
= rd
->domain_energy_unit
;
616 units
= rp
->energy_unit
;
619 return rapl_defaults
->compute_time_window(rp
, value
, to_raw
);
626 return div64_u64(value
, units
) * scale
;
630 return div64_u64(value
, scale
);
633 /* in the order of enum rapl_primitives */
634 static struct rapl_primitive_info rpi
[] = {
635 /* name, mask, shift, msr index, unit divisor */
636 PRIMITIVE_INFO_INIT(ENERGY_COUNTER
, ENERGY_STATUS_MASK
, 0,
637 RAPL_DOMAIN_REG_STATUS
, ENERGY_UNIT
, 0),
638 PRIMITIVE_INFO_INIT(POWER_LIMIT1
, POWER_LIMIT1_MASK
, 0,
639 RAPL_DOMAIN_REG_LIMIT
, POWER_UNIT
, 0),
640 PRIMITIVE_INFO_INIT(POWER_LIMIT2
, POWER_LIMIT2_MASK
, 32,
641 RAPL_DOMAIN_REG_LIMIT
, POWER_UNIT
, 0),
642 PRIMITIVE_INFO_INIT(POWER_LIMIT4
, POWER_LIMIT4_MASK
, 0,
643 RAPL_DOMAIN_REG_PL4
, POWER_UNIT
, 0),
644 PRIMITIVE_INFO_INIT(FW_LOCK
, POWER_LOW_LOCK
, 31,
645 RAPL_DOMAIN_REG_LIMIT
, ARBITRARY_UNIT
, 0),
646 PRIMITIVE_INFO_INIT(PL1_ENABLE
, POWER_LIMIT1_ENABLE
, 15,
647 RAPL_DOMAIN_REG_LIMIT
, ARBITRARY_UNIT
, 0),
648 PRIMITIVE_INFO_INIT(PL1_CLAMP
, POWER_LIMIT1_CLAMP
, 16,
649 RAPL_DOMAIN_REG_LIMIT
, ARBITRARY_UNIT
, 0),
650 PRIMITIVE_INFO_INIT(PL2_ENABLE
, POWER_LIMIT2_ENABLE
, 47,
651 RAPL_DOMAIN_REG_LIMIT
, ARBITRARY_UNIT
, 0),
652 PRIMITIVE_INFO_INIT(PL2_CLAMP
, POWER_LIMIT2_CLAMP
, 48,
653 RAPL_DOMAIN_REG_LIMIT
, ARBITRARY_UNIT
, 0),
654 PRIMITIVE_INFO_INIT(PL4_ENABLE
, POWER_LIMIT4_MASK
, 0,
655 RAPL_DOMAIN_REG_PL4
, ARBITRARY_UNIT
, 0),
656 PRIMITIVE_INFO_INIT(TIME_WINDOW1
, TIME_WINDOW1_MASK
, 17,
657 RAPL_DOMAIN_REG_LIMIT
, TIME_UNIT
, 0),
658 PRIMITIVE_INFO_INIT(TIME_WINDOW2
, TIME_WINDOW2_MASK
, 49,
659 RAPL_DOMAIN_REG_LIMIT
, TIME_UNIT
, 0),
660 PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER
, POWER_INFO_THERMAL_SPEC_MASK
,
661 0, RAPL_DOMAIN_REG_INFO
, POWER_UNIT
, 0),
662 PRIMITIVE_INFO_INIT(MAX_POWER
, POWER_INFO_MAX_MASK
, 32,
663 RAPL_DOMAIN_REG_INFO
, POWER_UNIT
, 0),
664 PRIMITIVE_INFO_INIT(MIN_POWER
, POWER_INFO_MIN_MASK
, 16,
665 RAPL_DOMAIN_REG_INFO
, POWER_UNIT
, 0),
666 PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW
, POWER_INFO_MAX_TIME_WIN_MASK
, 48,
667 RAPL_DOMAIN_REG_INFO
, TIME_UNIT
, 0),
668 PRIMITIVE_INFO_INIT(THROTTLED_TIME
, PERF_STATUS_THROTTLE_TIME_MASK
, 0,
669 RAPL_DOMAIN_REG_PERF
, TIME_UNIT
, 0),
670 PRIMITIVE_INFO_INIT(PRIORITY_LEVEL
, PP_POLICY_MASK
, 0,
671 RAPL_DOMAIN_REG_POLICY
, ARBITRARY_UNIT
, 0),
673 PRIMITIVE_INFO_INIT(AVERAGE_POWER
, 0, 0, 0, POWER_UNIT
,
674 RAPL_PRIMITIVE_DERIVED
),
678 /* Read primitive data based on its related struct rapl_primitive_info.
679 * if xlate flag is set, return translated data based on data units, i.e.
680 * time, energy, and power.
681 * RAPL MSRs are non-architectual and are laid out not consistently across
682 * domains. Here we use primitive info to allow writing consolidated access
684 * For a given primitive, it is processed by MSR mask and shift. Unit conversion
685 * is pre-assigned based on RAPL unit MSRs read at init time.
686 * 63-------------------------- 31--------------------------- 0
688 * | |<- shift ----------------|
689 * 63-------------------------- 31--------------------------- 0
691 static int rapl_read_data_raw(struct rapl_domain
*rd
,
692 enum rapl_primitives prim
, bool xlate
, u64
*data
)
695 struct rapl_primitive_info
*rp
= &rpi
[prim
];
696 struct reg_action ra
;
699 if (!rp
->name
|| rp
->flag
& RAPL_PRIMITIVE_DUMMY
)
702 ra
.reg
= rd
->regs
[rp
->id
];
706 cpu
= rd
->rp
->lead_cpu
;
708 /* domain with 2 limits has different bit */
709 if (prim
== FW_LOCK
&& rd
->rp
->priv
->limits
[rd
->id
] == 2) {
710 rp
->mask
= POWER_HIGH_LOCK
;
713 /* non-hardware data are collected by the polling thread */
714 if (rp
->flag
& RAPL_PRIMITIVE_DERIVED
) {
715 *data
= rd
->rdd
.primitives
[prim
];
721 if (rd
->rp
->priv
->read_raw(cpu
, &ra
)) {
722 pr_debug("failed to read reg 0x%llx on cpu %d\n", ra
.reg
, cpu
);
726 value
= ra
.value
>> rp
->shift
;
729 *data
= rapl_unit_xlate(rd
, rp
->unit
, value
, 0);
736 /* Similar use of primitive info in the read counterpart */
737 static int rapl_write_data_raw(struct rapl_domain
*rd
,
738 enum rapl_primitives prim
,
739 unsigned long long value
)
741 struct rapl_primitive_info
*rp
= &rpi
[prim
];
744 struct reg_action ra
;
747 cpu
= rd
->rp
->lead_cpu
;
748 bits
= rapl_unit_xlate(rd
, rp
->unit
, value
, 1);
752 memset(&ra
, 0, sizeof(ra
));
754 ra
.reg
= rd
->regs
[rp
->id
];
758 ret
= rd
->rp
->priv
->write_raw(cpu
, &ra
);
764 * Raw RAPL data stored in MSRs are in certain scales. We need to
765 * convert them into standard units based on the units reported in
766 * the RAPL unit MSRs. This is specific to CPUs as the method to
767 * calculate units differ on different CPUs.
768 * We convert the units to below format based on CPUs.
770 * energy unit: picoJoules : Represented in picoJoules by default
771 * power unit : microWatts : Represented in milliWatts by default
772 * time unit : microseconds: Represented in seconds by default
774 static int rapl_check_unit_core(struct rapl_package
*rp
, int cpu
)
776 struct reg_action ra
;
779 ra
.reg
= rp
->priv
->reg_unit
;
781 if (rp
->priv
->read_raw(cpu
, &ra
)) {
782 pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
783 rp
->priv
->reg_unit
, cpu
);
787 value
= (ra
.value
& ENERGY_UNIT_MASK
) >> ENERGY_UNIT_OFFSET
;
788 rp
->energy_unit
= ENERGY_UNIT_SCALE
* 1000000 / (1 << value
);
790 value
= (ra
.value
& POWER_UNIT_MASK
) >> POWER_UNIT_OFFSET
;
791 rp
->power_unit
= 1000000 / (1 << value
);
793 value
= (ra
.value
& TIME_UNIT_MASK
) >> TIME_UNIT_OFFSET
;
794 rp
->time_unit
= 1000000 / (1 << value
);
796 pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n",
797 rp
->name
, rp
->energy_unit
, rp
->time_unit
, rp
->power_unit
);
802 static int rapl_check_unit_atom(struct rapl_package
*rp
, int cpu
)
804 struct reg_action ra
;
807 ra
.reg
= rp
->priv
->reg_unit
;
809 if (rp
->priv
->read_raw(cpu
, &ra
)) {
810 pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
811 rp
->priv
->reg_unit
, cpu
);
815 value
= (ra
.value
& ENERGY_UNIT_MASK
) >> ENERGY_UNIT_OFFSET
;
816 rp
->energy_unit
= ENERGY_UNIT_SCALE
* 1 << value
;
818 value
= (ra
.value
& POWER_UNIT_MASK
) >> POWER_UNIT_OFFSET
;
819 rp
->power_unit
= (1 << value
) * 1000;
821 value
= (ra
.value
& TIME_UNIT_MASK
) >> TIME_UNIT_OFFSET
;
822 rp
->time_unit
= 1000000 / (1 << value
);
824 pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n",
825 rp
->name
, rp
->energy_unit
, rp
->time_unit
, rp
->power_unit
);
830 static void power_limit_irq_save_cpu(void *info
)
833 struct rapl_package
*rp
= (struct rapl_package
*)info
;
835 /* save the state of PLN irq mask bit before disabling it */
836 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT
, &l
, &h
);
837 if (!(rp
->power_limit_irq
& PACKAGE_PLN_INT_SAVED
)) {
838 rp
->power_limit_irq
= l
& PACKAGE_THERM_INT_PLN_ENABLE
;
839 rp
->power_limit_irq
|= PACKAGE_PLN_INT_SAVED
;
841 l
&= ~PACKAGE_THERM_INT_PLN_ENABLE
;
842 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT
, l
, h
);
846 * When package power limit is set artificially low by RAPL, LVT
847 * thermal interrupt for package power limit should be ignored
848 * since we are not really exceeding the real limit. The intention
849 * is to avoid excessive interrupts while we are trying to save power.
850 * A useful feature might be routing the package_power_limit interrupt
851 * to userspace via eventfd. once we have a usecase, this is simple
852 * to do by adding an atomic notifier.
855 static void package_power_limit_irq_save(struct rapl_package
*rp
)
857 if (!boot_cpu_has(X86_FEATURE_PTS
) || !boot_cpu_has(X86_FEATURE_PLN
))
860 smp_call_function_single(rp
->lead_cpu
, power_limit_irq_save_cpu
, rp
, 1);
864 * Restore per package power limit interrupt enable state. Called from cpu
865 * hotplug code on package removal.
867 static void package_power_limit_irq_restore(struct rapl_package
*rp
)
871 if (!boot_cpu_has(X86_FEATURE_PTS
) || !boot_cpu_has(X86_FEATURE_PLN
))
874 /* irq enable state not saved, nothing to restore */
875 if (!(rp
->power_limit_irq
& PACKAGE_PLN_INT_SAVED
))
878 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT
, &l
, &h
);
880 if (rp
->power_limit_irq
& PACKAGE_THERM_INT_PLN_ENABLE
)
881 l
|= PACKAGE_THERM_INT_PLN_ENABLE
;
883 l
&= ~PACKAGE_THERM_INT_PLN_ENABLE
;
885 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT
, l
, h
);
888 static void set_floor_freq_default(struct rapl_domain
*rd
, bool mode
)
890 int nr_powerlimit
= find_nr_power_limit(rd
);
892 /* always enable clamp such that p-state can go below OS requested
893 * range. power capping priority over guranteed frequency.
895 rapl_write_data_raw(rd
, PL1_CLAMP
, mode
);
897 /* some domains have pl2 */
898 if (nr_powerlimit
> 1) {
899 rapl_write_data_raw(rd
, PL2_ENABLE
, mode
);
900 rapl_write_data_raw(rd
, PL2_CLAMP
, mode
);
904 static void set_floor_freq_atom(struct rapl_domain
*rd
, bool enable
)
906 static u32 power_ctrl_orig_val
;
909 if (!rapl_defaults
->floor_freq_reg_addr
) {
910 pr_err("Invalid floor frequency config register\n");
914 if (!power_ctrl_orig_val
)
915 iosf_mbi_read(BT_MBI_UNIT_PMC
, MBI_CR_READ
,
916 rapl_defaults
->floor_freq_reg_addr
,
917 &power_ctrl_orig_val
);
918 mdata
= power_ctrl_orig_val
;
920 mdata
&= ~(0x7f << 8);
923 iosf_mbi_write(BT_MBI_UNIT_PMC
, MBI_CR_WRITE
,
924 rapl_defaults
->floor_freq_reg_addr
, mdata
);
927 static u64
rapl_compute_time_window_core(struct rapl_package
*rp
, u64 value
,
930 u64 f
, y
; /* fraction and exp. used for time unit */
933 * Special processing based on 2^Y*(1+F/4), refer
934 * to Intel Software Developer's manual Vol.3B: CH 14.9.3.
937 f
= (value
& 0x60) >> 5;
939 value
= (1 << y
) * (4 + f
) * rp
->time_unit
/ 4;
941 do_div(value
, rp
->time_unit
);
943 f
= div64_u64(4 * (value
- (1 << y
)), 1 << y
);
944 value
= (y
& 0x1f) | ((f
& 0x3) << 5);
949 static u64
rapl_compute_time_window_atom(struct rapl_package
*rp
, u64 value
,
953 * Atom time unit encoding is straight forward val * time_unit,
954 * where time_unit is default to 1 sec. Never 0.
957 return (value
) ? value
*= rp
->time_unit
: rp
->time_unit
;
959 value
= div64_u64(value
, rp
->time_unit
);
964 static const struct rapl_defaults rapl_defaults_core
= {
965 .floor_freq_reg_addr
= 0,
966 .check_unit
= rapl_check_unit_core
,
967 .set_floor_freq
= set_floor_freq_default
,
968 .compute_time_window
= rapl_compute_time_window_core
,
971 static const struct rapl_defaults rapl_defaults_hsw_server
= {
972 .check_unit
= rapl_check_unit_core
,
973 .set_floor_freq
= set_floor_freq_default
,
974 .compute_time_window
= rapl_compute_time_window_core
,
975 .dram_domain_energy_unit
= 15300,
978 static const struct rapl_defaults rapl_defaults_spr_server
= {
979 .check_unit
= rapl_check_unit_core
,
980 .set_floor_freq
= set_floor_freq_default
,
981 .compute_time_window
= rapl_compute_time_window_core
,
982 .dram_domain_energy_unit
= 15300,
983 .psys_domain_energy_unit
= 1000000000,
986 static const struct rapl_defaults rapl_defaults_byt
= {
987 .floor_freq_reg_addr
= IOSF_CPU_POWER_BUDGET_CTL_BYT
,
988 .check_unit
= rapl_check_unit_atom
,
989 .set_floor_freq
= set_floor_freq_atom
,
990 .compute_time_window
= rapl_compute_time_window_atom
,
993 static const struct rapl_defaults rapl_defaults_tng
= {
994 .floor_freq_reg_addr
= IOSF_CPU_POWER_BUDGET_CTL_TNG
,
995 .check_unit
= rapl_check_unit_atom
,
996 .set_floor_freq
= set_floor_freq_atom
,
997 .compute_time_window
= rapl_compute_time_window_atom
,
1000 static const struct rapl_defaults rapl_defaults_ann
= {
1001 .floor_freq_reg_addr
= 0,
1002 .check_unit
= rapl_check_unit_atom
,
1003 .set_floor_freq
= NULL
,
1004 .compute_time_window
= rapl_compute_time_window_atom
,
1007 static const struct rapl_defaults rapl_defaults_cht
= {
1008 .floor_freq_reg_addr
= 0,
1009 .check_unit
= rapl_check_unit_atom
,
1010 .set_floor_freq
= NULL
,
1011 .compute_time_window
= rapl_compute_time_window_atom
,
1014 static const struct rapl_defaults rapl_defaults_amd
= {
1015 .check_unit
= rapl_check_unit_core
,
1018 static const struct x86_cpu_id rapl_ids
[] __initconst
= {
1019 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE
, &rapl_defaults_core
),
1020 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X
, &rapl_defaults_core
),
1022 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE
, &rapl_defaults_core
),
1023 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X
, &rapl_defaults_core
),
1025 X86_MATCH_INTEL_FAM6_MODEL(HASWELL
, &rapl_defaults_core
),
1026 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L
, &rapl_defaults_core
),
1027 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G
, &rapl_defaults_core
),
1028 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X
, &rapl_defaults_hsw_server
),
1030 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL
, &rapl_defaults_core
),
1031 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G
, &rapl_defaults_core
),
1032 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D
, &rapl_defaults_core
),
1033 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X
, &rapl_defaults_hsw_server
),
1035 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE
, &rapl_defaults_core
),
1036 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L
, &rapl_defaults_core
),
1037 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X
, &rapl_defaults_hsw_server
),
1038 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L
, &rapl_defaults_core
),
1039 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE
, &rapl_defaults_core
),
1040 X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L
, &rapl_defaults_core
),
1041 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L
, &rapl_defaults_core
),
1042 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE
, &rapl_defaults_core
),
1043 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI
, &rapl_defaults_core
),
1044 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X
, &rapl_defaults_hsw_server
),
1045 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D
, &rapl_defaults_hsw_server
),
1046 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L
, &rapl_defaults_core
),
1047 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE
, &rapl_defaults_core
),
1048 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L
, &rapl_defaults_core
),
1049 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE
, &rapl_defaults_core
),
1050 X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE
, &rapl_defaults_core
),
1051 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE
, &rapl_defaults_core
),
1052 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X
, &rapl_defaults_spr_server
),
1053 X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD
, &rapl_defaults_core
),
1055 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT
, &rapl_defaults_byt
),
1056 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT
, &rapl_defaults_cht
),
1057 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID
, &rapl_defaults_tng
),
1058 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID
, &rapl_defaults_ann
),
1059 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT
, &rapl_defaults_core
),
1060 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS
, &rapl_defaults_core
),
1061 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D
, &rapl_defaults_core
),
1062 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT
, &rapl_defaults_core
),
1063 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D
, &rapl_defaults_core
),
1064 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L
, &rapl_defaults_core
),
1066 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL
, &rapl_defaults_hsw_server
),
1067 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM
, &rapl_defaults_hsw_server
),
1069 X86_MATCH_VENDOR_FAM(AMD
, 0x17, &rapl_defaults_amd
),
1070 X86_MATCH_VENDOR_FAM(AMD
, 0x19, &rapl_defaults_amd
),
1073 MODULE_DEVICE_TABLE(x86cpu
, rapl_ids
);
1075 /* Read once for all raw primitive data for domains */
1076 static void rapl_update_domain_data(struct rapl_package
*rp
)
1081 for (dmn
= 0; dmn
< rp
->nr_domains
; dmn
++) {
1082 pr_debug("update %s domain %s data\n", rp
->name
,
1083 rp
->domains
[dmn
].name
);
1084 /* exclude non-raw primitives */
1085 for (prim
= 0; prim
< NR_RAW_PRIMITIVES
; prim
++) {
1086 if (!rapl_read_data_raw(&rp
->domains
[dmn
], prim
,
1087 rpi
[prim
].unit
, &val
))
1088 rp
->domains
[dmn
].rdd
.primitives
[prim
] = val
;
1094 static int rapl_package_register_powercap(struct rapl_package
*rp
)
1096 struct rapl_domain
*rd
;
1097 struct powercap_zone
*power_zone
= NULL
;
1100 /* Update the domain data of the new package */
1101 rapl_update_domain_data(rp
);
1103 /* first we register package domain as the parent zone */
1104 for (rd
= rp
->domains
; rd
< rp
->domains
+ rp
->nr_domains
; rd
++) {
1105 if (rd
->id
== RAPL_DOMAIN_PACKAGE
) {
1106 nr_pl
= find_nr_power_limit(rd
);
1107 pr_debug("register package domain %s\n", rp
->name
);
1108 power_zone
= powercap_register_zone(&rd
->power_zone
,
1109 rp
->priv
->control_type
, rp
->name
,
1110 NULL
, &zone_ops
[rd
->id
], nr_pl
,
1112 if (IS_ERR(power_zone
)) {
1113 pr_debug("failed to register power zone %s\n",
1115 return PTR_ERR(power_zone
);
1117 /* track parent zone in per package/socket data */
1118 rp
->power_zone
= power_zone
;
1119 /* done, only one package domain per socket */
1124 pr_err("no package domain found, unknown topology!\n");
1127 /* now register domains as children of the socket/package */
1128 for (rd
= rp
->domains
; rd
< rp
->domains
+ rp
->nr_domains
; rd
++) {
1129 struct powercap_zone
*parent
= rp
->power_zone
;
1131 if (rd
->id
== RAPL_DOMAIN_PACKAGE
)
1133 if (rd
->id
== RAPL_DOMAIN_PLATFORM
)
1135 /* number of power limits per domain varies */
1136 nr_pl
= find_nr_power_limit(rd
);
1137 power_zone
= powercap_register_zone(&rd
->power_zone
,
1138 rp
->priv
->control_type
,
1140 &zone_ops
[rd
->id
], nr_pl
,
1143 if (IS_ERR(power_zone
)) {
1144 pr_debug("failed to register power_zone, %s:%s\n",
1145 rp
->name
, rd
->name
);
1146 ret
= PTR_ERR(power_zone
);
1154 * Clean up previously initialized domains within the package if we
1155 * failed after the first domain setup.
1157 while (--rd
>= rp
->domains
) {
1158 pr_debug("unregister %s domain %s\n", rp
->name
, rd
->name
);
1159 powercap_unregister_zone(rp
->priv
->control_type
,
1166 static int rapl_check_domain(int cpu
, int domain
, struct rapl_package
*rp
)
1168 struct reg_action ra
;
1171 case RAPL_DOMAIN_PACKAGE
:
1172 case RAPL_DOMAIN_PP0
:
1173 case RAPL_DOMAIN_PP1
:
1174 case RAPL_DOMAIN_DRAM
:
1175 case RAPL_DOMAIN_PLATFORM
:
1176 ra
.reg
= rp
->priv
->regs
[domain
][RAPL_DOMAIN_REG_STATUS
];
1179 pr_err("invalid domain id %d\n", domain
);
1182 /* make sure domain counters are available and contains non-zero
1183 * values, otherwise skip it.
1186 ra
.mask
= ENERGY_STATUS_MASK
;
1187 if (rp
->priv
->read_raw(cpu
, &ra
) || !ra
.value
)
1194 * Check if power limits are available. Two cases when they are not available:
1195 * 1. Locked by BIOS, in this case we still provide read-only access so that
1196 * users can see what limit is set by the BIOS.
1197 * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not
1198 * exist at all. In this case, we do not show the constraints in powercap.
1200 * Called after domains are detected and initialized.
1202 static void rapl_detect_powerlimit(struct rapl_domain
*rd
)
1207 /* check if the domain is locked by BIOS, ignore if MSR doesn't exist */
1208 if (!rapl_read_data_raw(rd
, FW_LOCK
, false, &val64
)) {
1210 pr_info("RAPL %s domain %s locked by BIOS\n",
1211 rd
->rp
->name
, rd
->name
);
1212 rd
->state
|= DOMAIN_STATE_BIOS_LOCKED
;
1215 /* check if power limit MSR exists, otherwise domain is monitoring only */
1216 for (i
= 0; i
< NR_POWER_LIMITS
; i
++) {
1217 int prim
= rd
->rpl
[i
].prim_id
;
1219 if (rapl_read_data_raw(rd
, prim
, false, &val64
))
1220 rd
->rpl
[i
].name
= NULL
;
1224 /* Detect active and valid domains for the given CPU, caller must
1225 * ensure the CPU belongs to the targeted package and CPU hotlug is disabled.
1227 static int rapl_detect_domains(struct rapl_package
*rp
, int cpu
)
1229 struct rapl_domain
*rd
;
1232 for (i
= 0; i
< RAPL_DOMAIN_MAX
; i
++) {
1233 /* use physical package id to read counters */
1234 if (!rapl_check_domain(cpu
, i
, rp
)) {
1235 rp
->domain_map
|= 1 << i
;
1236 pr_info("Found RAPL domain %s\n", rapl_domain_names
[i
]);
1239 rp
->nr_domains
= bitmap_weight(&rp
->domain_map
, RAPL_DOMAIN_MAX
);
1240 if (!rp
->nr_domains
) {
1241 pr_debug("no valid rapl domains found in %s\n", rp
->name
);
1244 pr_debug("found %d domains on %s\n", rp
->nr_domains
, rp
->name
);
1246 rp
->domains
= kcalloc(rp
->nr_domains
+ 1, sizeof(struct rapl_domain
),
1251 rapl_init_domains(rp
);
1253 for (rd
= rp
->domains
; rd
< rp
->domains
+ rp
->nr_domains
; rd
++)
1254 rapl_detect_powerlimit(rd
);
1259 /* called from CPU hotplug notifier, hotplug lock held */
1260 void rapl_remove_package(struct rapl_package
*rp
)
1262 struct rapl_domain
*rd
, *rd_package
= NULL
;
1264 package_power_limit_irq_restore(rp
);
1266 for (rd
= rp
->domains
; rd
< rp
->domains
+ rp
->nr_domains
; rd
++) {
1267 rapl_write_data_raw(rd
, PL1_ENABLE
, 0);
1268 rapl_write_data_raw(rd
, PL1_CLAMP
, 0);
1269 if (find_nr_power_limit(rd
) > 1) {
1270 rapl_write_data_raw(rd
, PL2_ENABLE
, 0);
1271 rapl_write_data_raw(rd
, PL2_CLAMP
, 0);
1272 rapl_write_data_raw(rd
, PL4_ENABLE
, 0);
1274 if (rd
->id
== RAPL_DOMAIN_PACKAGE
) {
1278 pr_debug("remove package, undo power limit on %s: %s\n",
1279 rp
->name
, rd
->name
);
1280 powercap_unregister_zone(rp
->priv
->control_type
,
1283 /* do parent zone last */
1284 powercap_unregister_zone(rp
->priv
->control_type
,
1285 &rd_package
->power_zone
);
1286 list_del(&rp
->plist
);
1289 EXPORT_SYMBOL_GPL(rapl_remove_package
);
1291 /* caller to ensure CPU hotplug lock is held */
1292 struct rapl_package
*rapl_find_package_domain(int cpu
, struct rapl_if_priv
*priv
)
1294 int id
= topology_logical_die_id(cpu
);
1295 struct rapl_package
*rp
;
1297 list_for_each_entry(rp
, &rapl_packages
, plist
) {
1299 && rp
->priv
->control_type
== priv
->control_type
)
1305 EXPORT_SYMBOL_GPL(rapl_find_package_domain
);
1307 /* called from CPU hotplug notifier, hotplug lock held */
1308 struct rapl_package
*rapl_add_package(int cpu
, struct rapl_if_priv
*priv
)
1310 int id
= topology_logical_die_id(cpu
);
1311 struct rapl_package
*rp
;
1312 struct cpuinfo_x86
*c
= &cpu_data(cpu
);
1316 return ERR_PTR(-ENODEV
);
1318 rp
= kzalloc(sizeof(struct rapl_package
), GFP_KERNEL
);
1320 return ERR_PTR(-ENOMEM
);
1322 /* add the new package to the list */
1327 if (topology_max_die_per_package() > 1)
1328 snprintf(rp
->name
, PACKAGE_DOMAIN_NAME_LENGTH
,
1329 "package-%d-die-%d", c
->phys_proc_id
, c
->cpu_die_id
);
1331 snprintf(rp
->name
, PACKAGE_DOMAIN_NAME_LENGTH
, "package-%d",
1334 /* check if the package contains valid domains */
1335 if (rapl_detect_domains(rp
, cpu
) || rapl_defaults
->check_unit(rp
, cpu
)) {
1337 goto err_free_package
;
1339 ret
= rapl_package_register_powercap(rp
);
1341 INIT_LIST_HEAD(&rp
->plist
);
1342 list_add(&rp
->plist
, &rapl_packages
);
1349 return ERR_PTR(ret
);
1351 EXPORT_SYMBOL_GPL(rapl_add_package
);
1353 static void power_limit_state_save(void)
1355 struct rapl_package
*rp
;
1356 struct rapl_domain
*rd
;
1360 list_for_each_entry(rp
, &rapl_packages
, plist
) {
1361 if (!rp
->power_zone
)
1363 rd
= power_zone_to_rapl_domain(rp
->power_zone
);
1364 nr_pl
= find_nr_power_limit(rd
);
1365 for (i
= 0; i
< nr_pl
; i
++) {
1366 switch (rd
->rpl
[i
].prim_id
) {
1368 ret
= rapl_read_data_raw(rd
,
1370 &rd
->rpl
[i
].last_power_limit
);
1372 rd
->rpl
[i
].last_power_limit
= 0;
1375 ret
= rapl_read_data_raw(rd
,
1377 &rd
->rpl
[i
].last_power_limit
);
1379 rd
->rpl
[i
].last_power_limit
= 0;
1382 ret
= rapl_read_data_raw(rd
,
1384 &rd
->rpl
[i
].last_power_limit
);
1386 rd
->rpl
[i
].last_power_limit
= 0;
1394 static void power_limit_state_restore(void)
1396 struct rapl_package
*rp
;
1397 struct rapl_domain
*rd
;
1401 list_for_each_entry(rp
, &rapl_packages
, plist
) {
1402 if (!rp
->power_zone
)
1404 rd
= power_zone_to_rapl_domain(rp
->power_zone
);
1405 nr_pl
= find_nr_power_limit(rd
);
1406 for (i
= 0; i
< nr_pl
; i
++) {
1407 switch (rd
->rpl
[i
].prim_id
) {
1409 if (rd
->rpl
[i
].last_power_limit
)
1410 rapl_write_data_raw(rd
, POWER_LIMIT1
,
1411 rd
->rpl
[i
].last_power_limit
);
1414 if (rd
->rpl
[i
].last_power_limit
)
1415 rapl_write_data_raw(rd
, POWER_LIMIT2
,
1416 rd
->rpl
[i
].last_power_limit
);
1419 if (rd
->rpl
[i
].last_power_limit
)
1420 rapl_write_data_raw(rd
, POWER_LIMIT4
,
1421 rd
->rpl
[i
].last_power_limit
);
1429 static int rapl_pm_callback(struct notifier_block
*nb
,
1430 unsigned long mode
, void *_unused
)
1433 case PM_SUSPEND_PREPARE
:
1434 power_limit_state_save();
1436 case PM_POST_SUSPEND
:
1437 power_limit_state_restore();
1443 static struct notifier_block rapl_pm_notifier
= {
1444 .notifier_call
= rapl_pm_callback
,
1447 static struct platform_device
*rapl_msr_platdev
;
1449 static int __init
rapl_init(void)
1451 const struct x86_cpu_id
*id
;
1454 id
= x86_match_cpu(rapl_ids
);
1456 pr_err("driver does not support CPU family %d model %d\n",
1457 boot_cpu_data
.x86
, boot_cpu_data
.x86_model
);
1462 rapl_defaults
= (struct rapl_defaults
*)id
->driver_data
;
1464 ret
= register_pm_notifier(&rapl_pm_notifier
);
1468 rapl_msr_platdev
= platform_device_alloc("intel_rapl_msr", 0);
1469 if (!rapl_msr_platdev
) {
1474 ret
= platform_device_add(rapl_msr_platdev
);
1476 platform_device_put(rapl_msr_platdev
);
1480 unregister_pm_notifier(&rapl_pm_notifier
);
1485 static void __exit
rapl_exit(void)
1487 platform_device_unregister(rapl_msr_platdev
);
1488 unregister_pm_notifier(&rapl_pm_notifier
);
1491 fs_initcall(rapl_init
);
1492 module_exit(rapl_exit
);
1494 MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code");
1495 MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>");
1496 MODULE_LICENSE("GPL v2");