1 // SPDX-License-Identifier: GPL-2.0-only
3 * Hardware Feedback Interface Driver
5 * Copyright (c) 2021, Intel Corporation.
7 * Authors: Aubrey Li <aubrey.li@linux.intel.com>
8 * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
11 * The Hardware Feedback Interface provides a performance and energy efficiency
12 * capability information for each CPU in the system. Depending on the processor
13 * model, hardware may periodically update these capabilities as a result of
14 * changes in the operating conditions (e.g., power limits or thermal
15 * constraints). On other processor models, there is a single HFI update
18 * This file provides functionality to process HFI updates and relay these
19 * updates to userspace.
22 #define pr_fmt(fmt) "intel-hfi: " fmt
24 #include <linux/bitops.h>
25 #include <linux/cpufeature.h>
26 #include <linux/cpumask.h>
27 #include <linux/delay.h>
28 #include <linux/gfp.h>
30 #include <linux/kernel.h>
31 #include <linux/math.h>
32 #include <linux/mutex.h>
33 #include <linux/percpu-defs.h>
34 #include <linux/printk.h>
35 #include <linux/processor.h>
36 #include <linux/slab.h>
37 #include <linux/spinlock.h>
38 #include <linux/suspend.h>
39 #include <linux/string.h>
40 #include <linux/syscore_ops.h>
41 #include <linux/topology.h>
42 #include <linux/workqueue.h>
46 #include "intel_hfi.h"
47 #include "thermal_interrupt.h"
49 #include "../thermal_netlink.h"
51 /* Hardware Feedback Interface MSR configuration bits */
52 #define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
53 #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0)
55 /* CPUID detection and enumeration definitions for HFI */
57 #define CPUID_HFI_LEAF 6
59 union hfi_capabilities
{
62 u8 energy_efficiency
:1;
70 union hfi_capabilities capabilities
;
79 * struct hfi_cpu_data - HFI capabilities per CPU
80 * @perf_cap: Performance capability
81 * @ee_cap: Energy efficiency capability
83 * Capabilities of a logical processor in the HFI table. These capabilities are
92 * struct hfi_hdr - Header of the HFI table
93 * @perf_updated: Hardware updated performance capabilities
94 * @ee_updated: Hardware updated energy efficiency capabilities
96 * Properties of the data in an HFI table.
104 * struct hfi_instance - Representation of an HFI instance (i.e., a table)
105 * @local_table: Base of the local copy of the HFI table
106 * @timestamp: Timestamp of the last update of the local table.
107 * Located at the base of the local table.
108 * @hdr: Base address of the header of the local table
109 * @data: Base address of the data of the local table
110 * @cpus: CPUs represented in this HFI table instance
111 * @hw_table: Pointer to the HFI table of this instance
112 * @update_work: Delayed work to process HFI updates
113 * @table_lock: Lock to protect acceses to the table of this instance
114 * @event_lock: Lock to process HFI interrupts
116 * A set of parameters to parse and navigate a specific HFI table.
118 struct hfi_instance
{
127 struct delayed_work update_work
;
128 raw_spinlock_t table_lock
;
129 raw_spinlock_t event_lock
;
133 * struct hfi_features - Supported HFI features
134 * @nr_table_pages: Size of the HFI table in 4KB pages
135 * @cpu_stride: Stride size to locate the capability data of a logical
136 * processor within the table (i.e., row stride)
137 * @hdr_size: Size of the table header
139 * Parameters and supported features that are common to all HFI instances
141 struct hfi_features
{
142 size_t nr_table_pages
;
143 unsigned int cpu_stride
;
144 unsigned int hdr_size
;
148 * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
149 * @index: Row of this CPU in its HFI table
150 * @hfi_instance: Attributes of the HFI table to which this CPU belongs
152 * Parameters to link a logical processor to an HFI table and a row within it.
154 struct hfi_cpu_info
{
156 struct hfi_instance
*hfi_instance
;
159 static DEFINE_PER_CPU(struct hfi_cpu_info
, hfi_cpu_info
) = { .index
= -1 };
161 static int max_hfi_instances
;
162 static int hfi_clients_nr
;
163 static struct hfi_instance
*hfi_instances
;
165 static struct hfi_features hfi_features
;
166 static DEFINE_MUTEX(hfi_instance_lock
);
168 static struct workqueue_struct
*hfi_updates_wq
;
169 #define HFI_UPDATE_DELAY_MS 100
170 #define HFI_THERMNL_CAPS_PER_EVENT 64
172 static void get_hfi_caps(struct hfi_instance
*hfi_instance
,
173 struct thermal_genl_cpu_caps
*cpu_caps
)
177 raw_spin_lock_irq(&hfi_instance
->table_lock
);
178 for_each_cpu(cpu
, hfi_instance
->cpus
) {
179 struct hfi_cpu_data
*caps
;
182 index
= per_cpu(hfi_cpu_info
, cpu
).index
;
183 caps
= hfi_instance
->data
+ index
* hfi_features
.cpu_stride
;
184 cpu_caps
[i
].cpu
= cpu
;
187 * Scale performance and energy efficiency to
188 * the [0, 1023] interval that thermal netlink uses.
190 cpu_caps
[i
].performance
= caps
->perf_cap
<< 2;
191 cpu_caps
[i
].efficiency
= caps
->ee_cap
<< 2;
195 raw_spin_unlock_irq(&hfi_instance
->table_lock
);
199 * Call update_capabilities() when there are changes in the HFI table.
201 static void update_capabilities(struct hfi_instance
*hfi_instance
)
203 struct thermal_genl_cpu_caps
*cpu_caps
;
204 int i
= 0, cpu_count
;
206 /* CPUs may come online/offline while processing an HFI update. */
207 mutex_lock(&hfi_instance_lock
);
209 cpu_count
= cpumask_weight(hfi_instance
->cpus
);
211 /* No CPUs to report in this hfi_instance. */
215 cpu_caps
= kcalloc(cpu_count
, sizeof(*cpu_caps
), GFP_KERNEL
);
219 get_hfi_caps(hfi_instance
, cpu_caps
);
221 if (cpu_count
< HFI_THERMNL_CAPS_PER_EVENT
)
224 /* Process complete chunks of HFI_THERMNL_CAPS_PER_EVENT capabilities. */
226 (i
+ HFI_THERMNL_CAPS_PER_EVENT
) <= cpu_count
;
227 i
+= HFI_THERMNL_CAPS_PER_EVENT
)
228 thermal_genl_cpu_capability_event(HFI_THERMNL_CAPS_PER_EVENT
,
231 cpu_count
= cpu_count
- i
;
234 /* Process the remaining capabilities if any. */
236 thermal_genl_cpu_capability_event(cpu_count
, &cpu_caps
[i
]);
240 mutex_unlock(&hfi_instance_lock
);
243 static void hfi_update_work_fn(struct work_struct
*work
)
245 struct hfi_instance
*hfi_instance
;
247 hfi_instance
= container_of(to_delayed_work(work
), struct hfi_instance
,
250 update_capabilities(hfi_instance
);
253 void intel_hfi_process_event(__u64 pkg_therm_status_msr_val
)
255 struct hfi_instance
*hfi_instance
;
256 int cpu
= smp_processor_id();
257 struct hfi_cpu_info
*info
;
258 u64 new_timestamp
, msr
, hfi
;
260 if (!pkg_therm_status_msr_val
)
263 info
= &per_cpu(hfi_cpu_info
, cpu
);
268 * A CPU is linked to its HFI instance before the thermal vector in the
269 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
270 * when receiving an HFI event.
272 hfi_instance
= info
->hfi_instance
;
273 if (unlikely(!hfi_instance
)) {
274 pr_debug("Received event on CPU %d but instance was null", cpu
);
279 * On most systems, all CPUs in the package receive a package-level
280 * thermal interrupt when there is an HFI update. It is sufficient to
281 * let a single CPU to acknowledge the update and queue work to
282 * process it. The remaining CPUs can resume their work.
284 if (!raw_spin_trylock(&hfi_instance
->event_lock
))
287 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS
, msr
);
288 hfi
= msr
& PACKAGE_THERM_STATUS_HFI_UPDATED
;
290 raw_spin_unlock(&hfi_instance
->event_lock
);
295 * Ack duplicate update. Since there is an active HFI
296 * status from HW, it must be a new event, not a case
297 * where a lagging CPU entered the locked region.
299 new_timestamp
= *(u64
*)hfi_instance
->hw_table
;
300 if (*hfi_instance
->timestamp
== new_timestamp
) {
301 thermal_clear_package_intr_status(PACKAGE_LEVEL
, PACKAGE_THERM_STATUS_HFI_UPDATED
);
302 raw_spin_unlock(&hfi_instance
->event_lock
);
306 raw_spin_lock(&hfi_instance
->table_lock
);
309 * Copy the updated table into our local copy. This includes the new
312 memcpy(hfi_instance
->local_table
, hfi_instance
->hw_table
,
313 hfi_features
.nr_table_pages
<< PAGE_SHIFT
);
316 * Let hardware know that we are done reading the HFI table and it is
317 * free to update it again.
319 thermal_clear_package_intr_status(PACKAGE_LEVEL
, PACKAGE_THERM_STATUS_HFI_UPDATED
);
321 raw_spin_unlock(&hfi_instance
->table_lock
);
322 raw_spin_unlock(&hfi_instance
->event_lock
);
324 queue_delayed_work(hfi_updates_wq
, &hfi_instance
->update_work
,
325 msecs_to_jiffies(HFI_UPDATE_DELAY_MS
));
328 static void init_hfi_cpu_index(struct hfi_cpu_info
*info
)
330 union cpuid6_edx edx
;
332 /* Do not re-read @cpu's index if it has already been initialized. */
333 if (info
->index
> -1)
336 edx
.full
= cpuid_edx(CPUID_HFI_LEAF
);
337 info
->index
= edx
.split
.index
;
341 * The format of the HFI table depends on the number of capabilities that the
342 * hardware supports. Keep a data structure to navigate the table.
344 static void init_hfi_instance(struct hfi_instance
*hfi_instance
)
346 /* The HFI header is below the time-stamp. */
347 hfi_instance
->hdr
= hfi_instance
->local_table
+
348 sizeof(*hfi_instance
->timestamp
);
350 /* The HFI data starts below the header. */
351 hfi_instance
->data
= hfi_instance
->hdr
+ hfi_features
.hdr_size
;
354 /* Caller must hold hfi_instance_lock. */
355 static void hfi_enable(void)
359 rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG
, msr_val
);
360 msr_val
|= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT
;
361 wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG
, msr_val
);
364 static void hfi_set_hw_table(struct hfi_instance
*hfi_instance
)
366 phys_addr_t hw_table_pa
;
369 hw_table_pa
= virt_to_phys(hfi_instance
->hw_table
);
370 msr_val
= hw_table_pa
| HW_FEEDBACK_PTR_VALID_BIT
;
371 wrmsrl(MSR_IA32_HW_FEEDBACK_PTR
, msr_val
);
374 /* Caller must hold hfi_instance_lock. */
375 static void hfi_disable(void)
380 rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG
, msr_val
);
381 msr_val
&= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT
;
382 wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG
, msr_val
);
385 * Wait for hardware to acknowledge the disabling of HFI. Some
386 * processors may not do it. Wait for ~2ms. This is a reasonable
387 * time for hardware to complete any pending actions on the HFI
390 for (i
= 0; i
< 2000; i
++) {
391 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS
, msr_val
);
392 if (msr_val
& PACKAGE_THERM_STATUS_HFI_UPDATED
)
401 * intel_hfi_online() - Enable HFI on @cpu
402 * @cpu: CPU in which the HFI will be enabled
404 * Enable the HFI to be used in @cpu. The HFI is enabled at the package
405 * level. The first CPU in the package to come online does the full HFI
406 * initialization. Subsequent CPUs will just link themselves to the HFI
407 * instance of their package.
409 * This function is called before enabling the thermal vector in the local APIC
410 * in order to ensure that @cpu has an associated HFI instance when it receives
413 void intel_hfi_online(unsigned int cpu
)
415 struct hfi_instance
*hfi_instance
;
416 struct hfi_cpu_info
*info
;
419 /* Nothing to do if hfi_instances are missing. */
424 * Link @cpu to the HFI instance of its package. It does not
425 * matter whether the instance has been initialized.
427 info
= &per_cpu(hfi_cpu_info
, cpu
);
428 pkg_id
= topology_logical_package_id(cpu
);
429 hfi_instance
= info
->hfi_instance
;
431 if (pkg_id
>= max_hfi_instances
)
434 hfi_instance
= &hfi_instances
[pkg_id
];
435 info
->hfi_instance
= hfi_instance
;
438 init_hfi_cpu_index(info
);
441 * Now check if the HFI instance of the package of @cpu has been
442 * initialized (by checking its header). In such case, all we have to
443 * do is to add @cpu to this instance's cpumask and enable the instance
446 mutex_lock(&hfi_instance_lock
);
447 if (hfi_instance
->hdr
)
451 * Hardware is programmed with the physical address of the first page
452 * frame of the table. Hence, the allocated memory must be page-aligned.
454 * Some processors do not forget the initial address of the HFI table
455 * even after having been reprogrammed. Keep using the same pages. Do
458 hfi_instance
->hw_table
= alloc_pages_exact(hfi_features
.nr_table_pages
,
459 GFP_KERNEL
| __GFP_ZERO
);
460 if (!hfi_instance
->hw_table
)
464 * Allocate memory to keep a local copy of the table that
465 * hardware generates.
467 hfi_instance
->local_table
= kzalloc(hfi_features
.nr_table_pages
<< PAGE_SHIFT
,
469 if (!hfi_instance
->local_table
)
472 init_hfi_instance(hfi_instance
);
474 INIT_DELAYED_WORK(&hfi_instance
->update_work
, hfi_update_work_fn
);
475 raw_spin_lock_init(&hfi_instance
->table_lock
);
476 raw_spin_lock_init(&hfi_instance
->event_lock
);
479 cpumask_set_cpu(cpu
, hfi_instance
->cpus
);
482 * Enable this HFI instance if this is its first online CPU and
483 * there are user-space clients of thermal events.
485 if (cpumask_weight(hfi_instance
->cpus
) == 1 && hfi_clients_nr
> 0) {
486 hfi_set_hw_table(hfi_instance
);
491 mutex_unlock(&hfi_instance_lock
);
495 free_pages_exact(hfi_instance
->hw_table
, hfi_features
.nr_table_pages
);
500 * intel_hfi_offline() - Disable HFI on @cpu
501 * @cpu: CPU in which the HFI will be disabled
503 * Remove @cpu from those covered by its HFI instance.
505 * On some processors, hardware remembers previous programming settings even
506 * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
507 * package of @cpu are offline. See note in intel_hfi_online().
509 void intel_hfi_offline(unsigned int cpu
)
511 struct hfi_cpu_info
*info
= &per_cpu(hfi_cpu_info
, cpu
);
512 struct hfi_instance
*hfi_instance
;
515 * Check if @cpu as an associated, initialized (i.e., with a non-NULL
516 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
519 hfi_instance
= info
->hfi_instance
;
523 if (!hfi_instance
->hdr
)
526 mutex_lock(&hfi_instance_lock
);
527 cpumask_clear_cpu(cpu
, hfi_instance
->cpus
);
529 if (!cpumask_weight(hfi_instance
->cpus
))
532 mutex_unlock(&hfi_instance_lock
);
535 static __init
int hfi_parse_features(void)
537 unsigned int nr_capabilities
;
538 union cpuid6_edx edx
;
540 if (!boot_cpu_has(X86_FEATURE_HFI
))
544 * If we are here we know that CPUID_HFI_LEAF exists. Parse the
545 * supported capabilities and the size of the HFI table.
547 edx
.full
= cpuid_edx(CPUID_HFI_LEAF
);
549 if (!edx
.split
.capabilities
.split
.performance
) {
550 pr_debug("Performance reporting not supported! Not using HFI\n");
555 * The number of supported capabilities determines the number of
556 * columns in the HFI table. Exclude the reserved bits.
558 edx
.split
.capabilities
.split
.__reserved
= 0;
559 nr_capabilities
= hweight8(edx
.split
.capabilities
.bits
);
561 /* The number of 4KB pages required by the table */
562 hfi_features
.nr_table_pages
= edx
.split
.table_pages
+ 1;
565 * The header contains change indications for each supported feature.
566 * The size of the table header is rounded up to be a multiple of 8
569 hfi_features
.hdr_size
= DIV_ROUND_UP(nr_capabilities
, 8) * 8;
572 * Data of each logical processor is also rounded up to be a multiple
575 hfi_features
.cpu_stride
= DIV_ROUND_UP(nr_capabilities
, 8) * 8;
581 * If concurrency is not prevented by other means, the HFI enable/disable
582 * routines must be called under hfi_instance_lock."
584 static void hfi_enable_instance(void *ptr
)
586 hfi_set_hw_table(ptr
);
590 static void hfi_disable_instance(void *ptr
)
595 static void hfi_syscore_resume(void)
597 /* This code runs only on the boot CPU. */
598 struct hfi_cpu_info
*info
= &per_cpu(hfi_cpu_info
, 0);
599 struct hfi_instance
*hfi_instance
= info
->hfi_instance
;
601 /* No locking needed. There is no concurrency with CPU online. */
602 if (hfi_clients_nr
> 0)
603 hfi_enable_instance(hfi_instance
);
606 static int hfi_syscore_suspend(void)
608 /* No locking needed. There is no concurrency with CPU offline. */
614 static struct syscore_ops hfi_pm_ops
= {
615 .resume
= hfi_syscore_resume
,
616 .suspend
= hfi_syscore_suspend
,
619 static int hfi_thermal_notify(struct notifier_block
*nb
, unsigned long state
,
622 struct thermal_genl_notify
*notify
= _notify
;
623 struct hfi_instance
*hfi_instance
;
624 smp_call_func_t func
= NULL
;
628 if (notify
->mcgrp
!= THERMAL_GENL_EVENT_GROUP
)
631 if (state
!= THERMAL_NOTIFY_BIND
&& state
!= THERMAL_NOTIFY_UNBIND
)
634 mutex_lock(&hfi_instance_lock
);
637 case THERMAL_NOTIFY_BIND
:
638 if (++hfi_clients_nr
== 1)
639 func
= hfi_enable_instance
;
641 case THERMAL_NOTIFY_UNBIND
:
642 if (--hfi_clients_nr
== 0)
643 func
= hfi_disable_instance
;
650 for (i
= 0; i
< max_hfi_instances
; i
++) {
651 hfi_instance
= &hfi_instances
[i
];
652 if (cpumask_empty(hfi_instance
->cpus
))
655 cpu
= cpumask_any(hfi_instance
->cpus
);
656 smp_call_function_single(cpu
, func
, hfi_instance
, true);
660 mutex_unlock(&hfi_instance_lock
);
665 static struct notifier_block hfi_thermal_nb
= {
666 .notifier_call
= hfi_thermal_notify
,
669 void __init
intel_hfi_init(void)
671 struct hfi_instance
*hfi_instance
;
674 if (hfi_parse_features())
678 * Note: HFI resources are managed at the physical package scope.
679 * There could be platforms that enumerate packages as Linux dies.
680 * Special handling would be needed if this happens on an HFI-capable
683 max_hfi_instances
= topology_max_packages();
686 * This allocation may fail. CPU hotplug callbacks must check
687 * for a null pointer.
689 hfi_instances
= kcalloc(max_hfi_instances
, sizeof(*hfi_instances
),
694 for (i
= 0; i
< max_hfi_instances
; i
++) {
695 hfi_instance
= &hfi_instances
[i
];
696 if (!zalloc_cpumask_var(&hfi_instance
->cpus
, GFP_KERNEL
))
700 hfi_updates_wq
= create_singlethread_workqueue("hfi-updates");
705 * Both thermal core and Intel HFI can not be build as modules.
706 * As kernel build-in drivers they are initialized before user-space
707 * starts, hence we can not miss BIND/UNBIND events when applications
708 * add/remove thermal multicast group to/from a netlink socket.
710 if (thermal_genl_register_notifier(&hfi_thermal_nb
))
713 register_syscore_ops(&hfi_pm_ops
);
718 destroy_workqueue(hfi_updates_wq
);
721 for (j
= 0; j
< i
; ++j
) {
722 hfi_instance
= &hfi_instances
[j
];
723 free_cpumask_var(hfi_instance
->cpus
);
726 kfree(hfi_instances
);
727 hfi_instances
= NULL
;