1 // SPDX-License-Identifier: GPL-2.0-only
3 * coretemp.c - Linux kernel module for hardware monitoring
5 * Copyright (C) 2007 Rudolf Marek <r.marek@assembler.cz>
7 * Inspired from many hwmon drivers
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 #include <linux/module.h>
13 #include <linux/init.h>
14 #include <linux/slab.h>
15 #include <linux/jiffies.h>
16 #include <linux/hwmon.h>
17 #include <linux/sysfs.h>
18 #include <linux/hwmon-sysfs.h>
19 #include <linux/err.h>
20 #include <linux/mutex.h>
21 #include <linux/list.h>
22 #include <linux/platform_device.h>
23 #include <linux/cpu.h>
24 #include <linux/smp.h>
25 #include <linux/moduleparam.h>
26 #include <linux/pci.h>
28 #include <asm/processor.h>
29 #include <asm/cpu_device_id.h>
31 #define DRVNAME "coretemp"
34 * force_tjmax only matters when TjMax can't be read from the CPU itself.
35 * When set, it replaces the driver's suboptimal heuristic.
37 static int force_tjmax
;
38 module_param_named(tjmax
, force_tjmax
, int, 0444);
39 MODULE_PARM_DESC(tjmax
, "TjMax value in degrees Celsius");
41 #define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */
42 #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
43 #define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
44 #define CORETEMP_NAME_LENGTH 19 /* String Length of attrs */
45 #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
46 #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
47 #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
49 #define TO_CORE_ID(cpu) (cpu_data(cpu).cpu_core_id)
50 #define TO_ATTR_NO(cpu) (TO_CORE_ID(cpu) + BASE_SYSFS_ATTR_NO)
53 #define for_each_sibling(i, cpu) \
54 for_each_cpu(i, topology_sibling_cpumask(cpu))
56 #define for_each_sibling(i, cpu) for (i = 0; false; )
60 * Per-Core Temperature Data
61 * @last_updated: The time when the current temperature value was updated
62 * earlier (in jiffies).
63 * @cpu_core_id: The CPU Core from which temperature values should be read
64 * This value is passed as "id" field to rdmsr/wrmsr functions.
65 * @status_reg: One of IA32_THERM_STATUS or IA32_PACKAGE_THERM_STATUS,
66 * from where the temperature values should be read.
67 * @attr_size: Total number of pre-core attrs displayed in the sysfs.
68 * @is_pkg_data: If this is 1, the temp_data holds pkgtemp data.
69 * Otherwise, temp_data holds coretemp data.
70 * @valid: If this is 1, the current temperature is valid.
76 unsigned long last_updated
;
83 struct sensor_device_attribute sd_attrs
[TOTAL_ATTRS
];
84 char attr_name
[TOTAL_ATTRS
][CORETEMP_NAME_LENGTH
];
85 struct attribute
*attrs
[TOTAL_ATTRS
+ 1];
86 struct attribute_group attr_group
;
87 struct mutex update_lock
;
90 /* Platform Data per Physical CPU */
91 struct platform_data
{
92 struct device
*hwmon_dev
;
94 struct cpumask cpumask
;
95 struct temp_data
*core_data
[MAX_CORE_DATA
];
96 struct device_attribute name_attr
;
99 /* Keep track of how many zone pointers we allocated in init() */
100 static int max_zones __read_mostly
;
101 /* Array of zone pointers. Serialized by cpu hotplug lock */
102 static struct platform_device
**zone_devices
;
104 static ssize_t
show_label(struct device
*dev
,
105 struct device_attribute
*devattr
, char *buf
)
107 struct sensor_device_attribute
*attr
= to_sensor_dev_attr(devattr
);
108 struct platform_data
*pdata
= dev_get_drvdata(dev
);
109 struct temp_data
*tdata
= pdata
->core_data
[attr
->index
];
111 if (tdata
->is_pkg_data
)
112 return sprintf(buf
, "Package id %u\n", pdata
->pkg_id
);
114 return sprintf(buf
, "Core %u\n", tdata
->cpu_core_id
);
117 static ssize_t
show_crit_alarm(struct device
*dev
,
118 struct device_attribute
*devattr
, char *buf
)
121 struct sensor_device_attribute
*attr
= to_sensor_dev_attr(devattr
);
122 struct platform_data
*pdata
= dev_get_drvdata(dev
);
123 struct temp_data
*tdata
= pdata
->core_data
[attr
->index
];
125 mutex_lock(&tdata
->update_lock
);
126 rdmsr_on_cpu(tdata
->cpu
, tdata
->status_reg
, &eax
, &edx
);
127 mutex_unlock(&tdata
->update_lock
);
129 return sprintf(buf
, "%d\n", (eax
>> 5) & 1);
132 static ssize_t
show_tjmax(struct device
*dev
,
133 struct device_attribute
*devattr
, char *buf
)
135 struct sensor_device_attribute
*attr
= to_sensor_dev_attr(devattr
);
136 struct platform_data
*pdata
= dev_get_drvdata(dev
);
138 return sprintf(buf
, "%d\n", pdata
->core_data
[attr
->index
]->tjmax
);
141 static ssize_t
show_ttarget(struct device
*dev
,
142 struct device_attribute
*devattr
, char *buf
)
144 struct sensor_device_attribute
*attr
= to_sensor_dev_attr(devattr
);
145 struct platform_data
*pdata
= dev_get_drvdata(dev
);
147 return sprintf(buf
, "%d\n", pdata
->core_data
[attr
->index
]->ttarget
);
150 static ssize_t
show_temp(struct device
*dev
,
151 struct device_attribute
*devattr
, char *buf
)
154 struct sensor_device_attribute
*attr
= to_sensor_dev_attr(devattr
);
155 struct platform_data
*pdata
= dev_get_drvdata(dev
);
156 struct temp_data
*tdata
= pdata
->core_data
[attr
->index
];
158 mutex_lock(&tdata
->update_lock
);
160 /* Check whether the time interval has elapsed */
161 if (!tdata
->valid
|| time_after(jiffies
, tdata
->last_updated
+ HZ
)) {
162 rdmsr_on_cpu(tdata
->cpu
, tdata
->status_reg
, &eax
, &edx
);
164 * Ignore the valid bit. In all observed cases the register
165 * value is either low or zero if the valid bit is 0.
166 * Return it instead of reporting an error which doesn't
167 * really help at all.
169 tdata
->temp
= tdata
->tjmax
- ((eax
>> 16) & 0x7f) * 1000;
171 tdata
->last_updated
= jiffies
;
174 mutex_unlock(&tdata
->update_lock
);
175 return sprintf(buf
, "%d\n", tdata
->temp
);
183 static const struct tjmax_pci tjmax_pci_table
[] = {
184 { 0x0708, 110000 }, /* CE41x0 (Sodaville ) */
185 { 0x0c72, 102000 }, /* Atom S1240 (Centerton) */
186 { 0x0c73, 95000 }, /* Atom S1220 (Centerton) */
187 { 0x0c75, 95000 }, /* Atom S1260 (Centerton) */
195 static const struct tjmax tjmax_table
[] = {
196 { "CPU 230", 100000 }, /* Model 0x1c, stepping 2 */
197 { "CPU 330", 125000 }, /* Model 0x1c, stepping 2 */
208 static const struct tjmax_model tjmax_model_table
[] = {
209 { 0x1c, 10, 100000 }, /* D4xx, K4xx, N4xx, D5xx, K5xx, N5xx */
210 { 0x1c, ANY
, 90000 }, /* Z5xx, N2xx, possibly others
211 * Note: Also matches 230 and 330,
212 * which are covered by tjmax_table
214 { 0x26, ANY
, 90000 }, /* Atom Tunnel Creek (Exx), Lincroft (Z6xx)
215 * Note: TjMax for E6xxT is 110C, but CPU type
216 * is undetectable by software
218 { 0x27, ANY
, 90000 }, /* Atom Medfield (Z2460) */
219 { 0x35, ANY
, 90000 }, /* Atom Clover Trail/Cloverview (Z27x0) */
220 { 0x36, ANY
, 100000 }, /* Atom Cedar Trail/Cedarview (N2xxx, D2xxx)
221 * Also matches S12x0 (stepping 9), covered by
226 static int adjust_tjmax(struct cpuinfo_x86
*c
, u32 id
, struct device
*dev
)
228 /* The 100C is default for both mobile and non mobile CPUs */
231 int tjmax_ee
= 85000;
236 u16 devfn
= PCI_DEVFN(0, 0);
237 struct pci_dev
*host_bridge
= pci_get_domain_bus_and_slot(0, 0, devfn
);
240 * Explicit tjmax table entries override heuristics.
241 * First try PCI host bridge IDs, followed by model ID strings
242 * and model/stepping information.
244 if (host_bridge
&& host_bridge
->vendor
== PCI_VENDOR_ID_INTEL
) {
245 for (i
= 0; i
< ARRAY_SIZE(tjmax_pci_table
); i
++) {
246 if (host_bridge
->device
== tjmax_pci_table
[i
].device
)
247 return tjmax_pci_table
[i
].tjmax
;
251 for (i
= 0; i
< ARRAY_SIZE(tjmax_table
); i
++) {
252 if (strstr(c
->x86_model_id
, tjmax_table
[i
].id
))
253 return tjmax_table
[i
].tjmax
;
256 for (i
= 0; i
< ARRAY_SIZE(tjmax_model_table
); i
++) {
257 const struct tjmax_model
*tm
= &tjmax_model_table
[i
];
258 if (c
->x86_model
== tm
->model
&&
259 (tm
->mask
== ANY
|| c
->x86_stepping
== tm
->mask
))
263 /* Early chips have no MSR for TjMax */
265 if (c
->x86_model
== 0xf && c
->x86_stepping
< 4)
268 if (c
->x86_model
> 0xe && usemsr_ee
) {
272 * Now we can detect the mobile CPU using Intel provided table
273 * http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
274 * For Core2 cores, check MSR 0x17, bit 28 1 = Mobile CPU
276 err
= rdmsr_safe_on_cpu(id
, 0x17, &eax
, &edx
);
279 "Unable to access MSR 0x17, assuming desktop"
282 } else if (c
->x86_model
< 0x17 && !(eax
& 0x10000000)) {
284 * Trust bit 28 up to Penryn, I could not find any
285 * documentation on that; if you happen to know
286 * someone at Intel please ask
290 /* Platform ID bits 52:50 (EDX starts at bit 32) */
291 platform_id
= (edx
>> 18) & 0x7;
294 * Mobile Penryn CPU seems to be platform ID 7 or 5
297 if (c
->x86_model
== 0x17 &&
298 (platform_id
== 5 || platform_id
== 7)) {
300 * If MSR EE bit is set, set it to 90 degrees C,
301 * otherwise 105 degrees C
310 err
= rdmsr_safe_on_cpu(id
, 0xee, &eax
, &edx
);
313 "Unable to access MSR 0xEE, for Tjmax, left"
315 } else if (eax
& 0x40000000) {
318 } else if (tjmax
== 100000) {
320 * If we don't use msr EE it means we are desktop CPU
321 * (with exeception of Atom)
323 dev_warn(dev
, "Using relative temperature scale!\n");
329 static bool cpu_has_tjmax(struct cpuinfo_x86
*c
)
331 u8 model
= c
->x86_model
;
333 return model
> 0xe &&
341 static int get_tjmax(struct cpuinfo_x86
*c
, u32 id
, struct device
*dev
)
348 * A new feature of current Intel(R) processors, the
349 * IA32_TEMPERATURE_TARGET contains the TjMax value
351 err
= rdmsr_safe_on_cpu(id
, MSR_IA32_TEMPERATURE_TARGET
, &eax
, &edx
);
353 if (cpu_has_tjmax(c
))
354 dev_warn(dev
, "Unable to read TjMax from CPU %u\n", id
);
356 val
= (eax
>> 16) & 0xff;
358 * If the TjMax is not plausible, an assumption
362 dev_dbg(dev
, "TjMax is %d degrees C\n", val
);
368 dev_notice(dev
, "TjMax forced to %d degrees C by user\n",
370 return force_tjmax
* 1000;
374 * An assumption is made for early CPUs and unreadable MSR.
375 * NOTE: the calculated value may not be correct.
377 return adjust_tjmax(c
, id
, dev
);
380 static int create_core_attrs(struct temp_data
*tdata
, struct device
*dev
,
384 static ssize_t (*const rd_ptr
[TOTAL_ATTRS
]) (struct device
*dev
,
385 struct device_attribute
*devattr
, char *buf
) = {
386 show_label
, show_crit_alarm
, show_temp
, show_tjmax
,
388 static const char *const suffixes
[TOTAL_ATTRS
] = {
389 "label", "crit_alarm", "input", "crit", "max"
392 for (i
= 0; i
< tdata
->attr_size
; i
++) {
393 snprintf(tdata
->attr_name
[i
], CORETEMP_NAME_LENGTH
,
394 "temp%d_%s", attr_no
, suffixes
[i
]);
395 sysfs_attr_init(&tdata
->sd_attrs
[i
].dev_attr
.attr
);
396 tdata
->sd_attrs
[i
].dev_attr
.attr
.name
= tdata
->attr_name
[i
];
397 tdata
->sd_attrs
[i
].dev_attr
.attr
.mode
= 0444;
398 tdata
->sd_attrs
[i
].dev_attr
.show
= rd_ptr
[i
];
399 tdata
->sd_attrs
[i
].index
= attr_no
;
400 tdata
->attrs
[i
] = &tdata
->sd_attrs
[i
].dev_attr
.attr
;
402 tdata
->attr_group
.attrs
= tdata
->attrs
;
403 return sysfs_create_group(&dev
->kobj
, &tdata
->attr_group
);
407 static int chk_ucode_version(unsigned int cpu
)
409 struct cpuinfo_x86
*c
= &cpu_data(cpu
);
412 * Check if we have problem with errata AE18 of Core processors:
413 * Readings might stop update when processor visited too deep sleep,
414 * fixed for stepping D0 (6EC).
416 if (c
->x86_model
== 0xe && c
->x86_stepping
< 0xc && c
->microcode
< 0x39) {
417 pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n");
423 static struct platform_device
*coretemp_get_pdev(unsigned int cpu
)
425 int id
= topology_logical_die_id(cpu
);
427 if (id
>= 0 && id
< max_zones
)
428 return zone_devices
[id
];
432 static struct temp_data
*init_temp_data(unsigned int cpu
, int pkg_flag
)
434 struct temp_data
*tdata
;
436 tdata
= kzalloc(sizeof(struct temp_data
), GFP_KERNEL
);
440 tdata
->status_reg
= pkg_flag
? MSR_IA32_PACKAGE_THERM_STATUS
:
441 MSR_IA32_THERM_STATUS
;
442 tdata
->is_pkg_data
= pkg_flag
;
444 tdata
->cpu_core_id
= TO_CORE_ID(cpu
);
445 tdata
->attr_size
= MAX_CORE_ATTRS
;
446 mutex_init(&tdata
->update_lock
);
450 static int create_core_data(struct platform_device
*pdev
, unsigned int cpu
,
453 struct temp_data
*tdata
;
454 struct platform_data
*pdata
= platform_get_drvdata(pdev
);
455 struct cpuinfo_x86
*c
= &cpu_data(cpu
);
460 * Find attr number for sysfs:
461 * We map the attr number to core id of the CPU
462 * The attr number is always core id + 2
463 * The Pkgtemp will always show up as temp1_*, if available
465 attr_no
= pkg_flag
? PKG_SYSFS_ATTR_NO
: TO_ATTR_NO(cpu
);
467 if (attr_no
> MAX_CORE_DATA
- 1)
470 tdata
= init_temp_data(cpu
, pkg_flag
);
474 /* Test if we can access the status register */
475 err
= rdmsr_safe_on_cpu(cpu
, tdata
->status_reg
, &eax
, &edx
);
479 /* We can access status register. Get Critical Temperature */
480 tdata
->tjmax
= get_tjmax(c
, cpu
, &pdev
->dev
);
483 * Read the still undocumented bits 8:15 of IA32_TEMPERATURE_TARGET.
484 * The target temperature is available on older CPUs but not in this
485 * register. Atoms don't have the register at all.
487 if (c
->x86_model
> 0xe && c
->x86_model
!= 0x1c) {
488 err
= rdmsr_safe_on_cpu(cpu
, MSR_IA32_TEMPERATURE_TARGET
,
492 = tdata
->tjmax
- ((eax
>> 8) & 0xff) * 1000;
497 pdata
->core_data
[attr_no
] = tdata
;
499 /* Create sysfs interfaces */
500 err
= create_core_attrs(tdata
, pdata
->hwmon_dev
, attr_no
);
506 pdata
->core_data
[attr_no
] = NULL
;
512 coretemp_add_core(struct platform_device
*pdev
, unsigned int cpu
, int pkg_flag
)
514 if (create_core_data(pdev
, cpu
, pkg_flag
))
515 dev_err(&pdev
->dev
, "Adding Core %u failed\n", cpu
);
518 static void coretemp_remove_core(struct platform_data
*pdata
, int indx
)
520 struct temp_data
*tdata
= pdata
->core_data
[indx
];
522 /* Remove the sysfs attributes */
523 sysfs_remove_group(&pdata
->hwmon_dev
->kobj
, &tdata
->attr_group
);
525 kfree(pdata
->core_data
[indx
]);
526 pdata
->core_data
[indx
] = NULL
;
529 static int coretemp_probe(struct platform_device
*pdev
)
531 struct device
*dev
= &pdev
->dev
;
532 struct platform_data
*pdata
;
534 /* Initialize the per-zone data structures */
535 pdata
= devm_kzalloc(dev
, sizeof(struct platform_data
), GFP_KERNEL
);
539 pdata
->pkg_id
= pdev
->id
;
540 platform_set_drvdata(pdev
, pdata
);
542 pdata
->hwmon_dev
= devm_hwmon_device_register_with_groups(dev
, DRVNAME
,
544 return PTR_ERR_OR_ZERO(pdata
->hwmon_dev
);
547 static int coretemp_remove(struct platform_device
*pdev
)
549 struct platform_data
*pdata
= platform_get_drvdata(pdev
);
552 for (i
= MAX_CORE_DATA
- 1; i
>= 0; --i
)
553 if (pdata
->core_data
[i
])
554 coretemp_remove_core(pdata
, i
);
559 static struct platform_driver coretemp_driver
= {
563 .probe
= coretemp_probe
,
564 .remove
= coretemp_remove
,
567 static struct platform_device
*coretemp_device_add(unsigned int cpu
)
569 int err
, zoneid
= topology_logical_die_id(cpu
);
570 struct platform_device
*pdev
;
573 return ERR_PTR(-ENOMEM
);
575 pdev
= platform_device_alloc(DRVNAME
, zoneid
);
577 return ERR_PTR(-ENOMEM
);
579 err
= platform_device_add(pdev
);
581 platform_device_put(pdev
);
585 zone_devices
[zoneid
] = pdev
;
589 static int coretemp_cpu_online(unsigned int cpu
)
591 struct platform_device
*pdev
= coretemp_get_pdev(cpu
);
592 struct cpuinfo_x86
*c
= &cpu_data(cpu
);
593 struct platform_data
*pdata
;
596 * Don't execute this on resume as the offline callback did
597 * not get executed on suspend.
599 if (cpuhp_tasks_frozen
)
603 * CPUID.06H.EAX[0] indicates whether the CPU has thermal
604 * sensors. We check this bit only, all the early CPUs
605 * without thermal sensors will be filtered out.
607 if (!cpu_has(c
, X86_FEATURE_DTHERM
))
611 /* Check the microcode version of the CPU */
612 if (chk_ucode_version(cpu
))
616 * Alright, we have DTS support.
617 * We are bringing the _first_ core in this pkg
618 * online. So, initialize per-pkg data structures and
619 * then bring this core online.
621 pdev
= coretemp_device_add(cpu
);
623 return PTR_ERR(pdev
);
626 * Check whether pkgtemp support is available.
627 * If so, add interfaces for pkgtemp.
629 if (cpu_has(c
, X86_FEATURE_PTS
))
630 coretemp_add_core(pdev
, cpu
, 1);
633 pdata
= platform_get_drvdata(pdev
);
635 * Check whether a thread sibling is already online. If not add the
636 * interface for this CPU core.
638 if (!cpumask_intersects(&pdata
->cpumask
, topology_sibling_cpumask(cpu
)))
639 coretemp_add_core(pdev
, cpu
, 0);
641 cpumask_set_cpu(cpu
, &pdata
->cpumask
);
645 static int coretemp_cpu_offline(unsigned int cpu
)
647 struct platform_device
*pdev
= coretemp_get_pdev(cpu
);
648 struct platform_data
*pd
;
649 struct temp_data
*tdata
;
653 * Don't execute this on suspend as the device remove locks
656 if (cpuhp_tasks_frozen
)
659 /* If the physical CPU device does not exist, just return */
663 /* The core id is too big, just return */
664 indx
= TO_ATTR_NO(cpu
);
665 if (indx
> MAX_CORE_DATA
- 1)
668 pd
= platform_get_drvdata(pdev
);
669 tdata
= pd
->core_data
[indx
];
671 cpumask_clear_cpu(cpu
, &pd
->cpumask
);
674 * If this is the last thread sibling, remove the CPU core
675 * interface, If there is still a sibling online, transfer the
676 * target cpu of that core interface to it.
678 target
= cpumask_any_and(&pd
->cpumask
, topology_sibling_cpumask(cpu
));
679 if (target
>= nr_cpu_ids
) {
680 coretemp_remove_core(pd
, indx
);
681 } else if (tdata
&& tdata
->cpu
== cpu
) {
682 mutex_lock(&tdata
->update_lock
);
684 mutex_unlock(&tdata
->update_lock
);
688 * If all cores in this pkg are offline, remove the device. This
689 * will invoke the platform driver remove function, which cleans up
692 if (cpumask_empty(&pd
->cpumask
)) {
693 zone_devices
[topology_logical_die_id(cpu
)] = NULL
;
694 platform_device_unregister(pdev
);
699 * Check whether this core is the target for the package
700 * interface. We need to assign it to some other cpu.
702 tdata
= pd
->core_data
[PKG_SYSFS_ATTR_NO
];
703 if (tdata
&& tdata
->cpu
== cpu
) {
704 target
= cpumask_first(&pd
->cpumask
);
705 mutex_lock(&tdata
->update_lock
);
707 mutex_unlock(&tdata
->update_lock
);
711 static const struct x86_cpu_id __initconst coretemp_ids
[] = {
712 { X86_VENDOR_INTEL
, X86_FAMILY_ANY
, X86_MODEL_ANY
, X86_FEATURE_DTHERM
},
715 MODULE_DEVICE_TABLE(x86cpu
, coretemp_ids
);
717 static enum cpuhp_state coretemp_hp_online
;
719 static int __init
coretemp_init(void)
724 * CPUID.06H.EAX[0] indicates whether the CPU has thermal
725 * sensors. We check this bit only, all the early CPUs
726 * without thermal sensors will be filtered out.
728 if (!x86_match_cpu(coretemp_ids
))
731 max_zones
= topology_max_packages() * topology_max_die_per_package();
732 zone_devices
= kcalloc(max_zones
, sizeof(struct platform_device
*),
737 err
= platform_driver_register(&coretemp_driver
);
741 err
= cpuhp_setup_state(CPUHP_AP_ONLINE_DYN
, "hwmon/coretemp:online",
742 coretemp_cpu_online
, coretemp_cpu_offline
);
745 coretemp_hp_online
= err
;
749 platform_driver_unregister(&coretemp_driver
);
754 module_init(coretemp_init
)
756 static void __exit
coretemp_exit(void)
758 cpuhp_remove_state(coretemp_hp_online
);
759 platform_driver_unregister(&coretemp_driver
);
762 module_exit(coretemp_exit
)
764 MODULE_AUTHOR("Rudolf Marek <r.marek@assembler.cz>");
765 MODULE_DESCRIPTION("Intel Core temperature monitor");
766 MODULE_LICENSE("GPL");