1 // SPDX-License-Identifier: GPL-2.0
3 * UEFI Common Platform Error Record (CPER) support
5 * Copyright (C) 2010, Intel Corp.
6 * Author: Huang Ying <ying.huang@intel.com>
8 * CPER is the format used to describe platform hardware error by
9 * various tables, such as ERST, BERT and HEST etc.
11 * For more information about CPER, please refer to Appendix N of UEFI
12 * Specification version 2.4.
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 #include <linux/time.h>
18 #include <linux/cper.h>
19 #include <linux/dmi.h>
20 #include <linux/acpi.h>
21 #include <linux/pci.h>
22 #include <linux/aer.h>
23 #include <linux/printk.h>
24 #include <linux/bcd.h>
25 #include <acpi/ghes.h>
26 #include <ras/ras_event.h>
28 static char rcd_decode_str
[CPER_REC_LEN
];
31 * CPER record ID need to be unique even after reboot, because record
32 * ID is used as index for ERST storage, while CPER records from
33 * multiple boot may co-exist in ERST.
35 u64
cper_next_record_id(void)
37 static atomic64_t seq
;
39 if (!atomic64_read(&seq
)) {
40 time64_t time
= ktime_get_real_seconds();
43 * This code is unlikely to still be needed in year 2106,
44 * but just in case, let's use a few more bits for timestamps
45 * after y2038 to be sure they keep increasing monotonically
46 * for the next few hundred years...
48 if (time
< 0x80000000)
49 atomic64_set(&seq
, (ktime_get_real_seconds()) << 32);
51 atomic64_set(&seq
, 0x8000000000000000ull
|
52 ktime_get_real_seconds() << 24);
55 return atomic64_inc_return(&seq
);
57 EXPORT_SYMBOL_GPL(cper_next_record_id
);
59 static const char * const severity_strs
[] = {
66 const char *cper_severity_str(unsigned int severity
)
68 return severity
< ARRAY_SIZE(severity_strs
) ?
69 severity_strs
[severity
] : "unknown";
71 EXPORT_SYMBOL_GPL(cper_severity_str
);
74 * cper_print_bits - print strings for set bits
75 * @pfx: prefix for each line, including log level and prefix string
77 * @strs: string array, indexed by bit position
78 * @strs_size: size of the string array: @strs
80 * For each set bit in @bits, print the corresponding string in @strs.
81 * If the output length is longer than 80, multiple line will be
82 * printed, with @pfx is printed at the beginning of each line.
84 void cper_print_bits(const char *pfx
, unsigned int bits
,
85 const char * const strs
[], unsigned int strs_size
)
91 for (i
= 0; i
< strs_size
; i
++) {
92 if (!(bits
& (1U << i
)))
97 if (len
&& len
+ strlen(str
) + 2 > 80) {
102 len
= snprintf(buf
, sizeof(buf
), "%s%s", pfx
, str
);
104 len
+= snprintf(buf
+len
, sizeof(buf
)-len
, ", %s", str
);
110 static const char * const proc_type_strs
[] = {
116 static const char * const proc_isa_strs
[] = {
124 const char * const cper_proc_error_type_strs
[] = {
128 "micro-architectural error",
131 static const char * const proc_op_strs
[] = {
132 "unknown or generic",
135 "instruction execution",
138 static const char * const proc_flag_strs
[] = {
145 static void cper_print_proc_generic(const char *pfx
,
146 const struct cper_sec_proc_generic
*proc
)
148 if (proc
->validation_bits
& CPER_PROC_VALID_TYPE
)
149 printk("%s""processor_type: %d, %s\n", pfx
, proc
->proc_type
,
150 proc
->proc_type
< ARRAY_SIZE(proc_type_strs
) ?
151 proc_type_strs
[proc
->proc_type
] : "unknown");
152 if (proc
->validation_bits
& CPER_PROC_VALID_ISA
)
153 printk("%s""processor_isa: %d, %s\n", pfx
, proc
->proc_isa
,
154 proc
->proc_isa
< ARRAY_SIZE(proc_isa_strs
) ?
155 proc_isa_strs
[proc
->proc_isa
] : "unknown");
156 if (proc
->validation_bits
& CPER_PROC_VALID_ERROR_TYPE
) {
157 printk("%s""error_type: 0x%02x\n", pfx
, proc
->proc_error_type
);
158 cper_print_bits(pfx
, proc
->proc_error_type
,
159 cper_proc_error_type_strs
,
160 ARRAY_SIZE(cper_proc_error_type_strs
));
162 if (proc
->validation_bits
& CPER_PROC_VALID_OPERATION
)
163 printk("%s""operation: %d, %s\n", pfx
, proc
->operation
,
164 proc
->operation
< ARRAY_SIZE(proc_op_strs
) ?
165 proc_op_strs
[proc
->operation
] : "unknown");
166 if (proc
->validation_bits
& CPER_PROC_VALID_FLAGS
) {
167 printk("%s""flags: 0x%02x\n", pfx
, proc
->flags
);
168 cper_print_bits(pfx
, proc
->flags
, proc_flag_strs
,
169 ARRAY_SIZE(proc_flag_strs
));
171 if (proc
->validation_bits
& CPER_PROC_VALID_LEVEL
)
172 printk("%s""level: %d\n", pfx
, proc
->level
);
173 if (proc
->validation_bits
& CPER_PROC_VALID_VERSION
)
174 printk("%s""version_info: 0x%016llx\n", pfx
, proc
->cpu_version
);
175 if (proc
->validation_bits
& CPER_PROC_VALID_ID
)
176 printk("%s""processor_id: 0x%016llx\n", pfx
, proc
->proc_id
);
177 if (proc
->validation_bits
& CPER_PROC_VALID_TARGET_ADDRESS
)
178 printk("%s""target_address: 0x%016llx\n",
179 pfx
, proc
->target_addr
);
180 if (proc
->validation_bits
& CPER_PROC_VALID_REQUESTOR_ID
)
181 printk("%s""requestor_id: 0x%016llx\n",
182 pfx
, proc
->requestor_id
);
183 if (proc
->validation_bits
& CPER_PROC_VALID_RESPONDER_ID
)
184 printk("%s""responder_id: 0x%016llx\n",
185 pfx
, proc
->responder_id
);
186 if (proc
->validation_bits
& CPER_PROC_VALID_IP
)
187 printk("%s""IP: 0x%016llx\n", pfx
, proc
->ip
);
190 static const char * const mem_err_type_strs
[] = {
195 "single-symbol chipkill ECC",
196 "multi-symbol chipkill ECC",
204 "scrub corrected error",
205 "scrub uncorrected error",
206 "physical memory map-out event",
209 const char *cper_mem_err_type_str(unsigned int etype
)
211 return etype
< ARRAY_SIZE(mem_err_type_strs
) ?
212 mem_err_type_strs
[etype
] : "unknown";
214 EXPORT_SYMBOL_GPL(cper_mem_err_type_str
);
216 static int cper_mem_err_location(struct cper_mem_err_compact
*mem
, char *msg
)
224 len
= CPER_REC_LEN
- 1;
225 if (mem
->validation_bits
& CPER_MEM_VALID_NODE
)
226 n
+= scnprintf(msg
+ n
, len
- n
, "node: %d ", mem
->node
);
227 if (mem
->validation_bits
& CPER_MEM_VALID_CARD
)
228 n
+= scnprintf(msg
+ n
, len
- n
, "card: %d ", mem
->card
);
229 if (mem
->validation_bits
& CPER_MEM_VALID_MODULE
)
230 n
+= scnprintf(msg
+ n
, len
- n
, "module: %d ", mem
->module
);
231 if (mem
->validation_bits
& CPER_MEM_VALID_RANK_NUMBER
)
232 n
+= scnprintf(msg
+ n
, len
- n
, "rank: %d ", mem
->rank
);
233 if (mem
->validation_bits
& CPER_MEM_VALID_BANK
)
234 n
+= scnprintf(msg
+ n
, len
- n
, "bank: %d ", mem
->bank
);
235 if (mem
->validation_bits
& CPER_MEM_VALID_DEVICE
)
236 n
+= scnprintf(msg
+ n
, len
- n
, "device: %d ", mem
->device
);
237 if (mem
->validation_bits
& CPER_MEM_VALID_ROW
)
238 n
+= scnprintf(msg
+ n
, len
- n
, "row: %d ", mem
->row
);
239 if (mem
->validation_bits
& CPER_MEM_VALID_COLUMN
)
240 n
+= scnprintf(msg
+ n
, len
- n
, "column: %d ", mem
->column
);
241 if (mem
->validation_bits
& CPER_MEM_VALID_BIT_POSITION
)
242 n
+= scnprintf(msg
+ n
, len
- n
, "bit_position: %d ",
244 if (mem
->validation_bits
& CPER_MEM_VALID_REQUESTOR_ID
)
245 n
+= scnprintf(msg
+ n
, len
- n
, "requestor_id: 0x%016llx ",
247 if (mem
->validation_bits
& CPER_MEM_VALID_RESPONDER_ID
)
248 n
+= scnprintf(msg
+ n
, len
- n
, "responder_id: 0x%016llx ",
250 if (mem
->validation_bits
& CPER_MEM_VALID_TARGET_ID
)
251 scnprintf(msg
+ n
, len
- n
, "target_id: 0x%016llx ",
258 static int cper_dimm_err_location(struct cper_mem_err_compact
*mem
, char *msg
)
261 const char *bank
= NULL
, *device
= NULL
;
263 if (!msg
|| !(mem
->validation_bits
& CPER_MEM_VALID_MODULE_HANDLE
))
267 len
= CPER_REC_LEN
- 1;
268 dmi_memdev_name(mem
->mem_dev_handle
, &bank
, &device
);
270 n
= snprintf(msg
, len
, "DIMM location: %s %s ", bank
, device
);
272 n
= snprintf(msg
, len
,
273 "DIMM location: not present. DMI handle: 0x%.4x ",
274 mem
->mem_dev_handle
);
280 void cper_mem_err_pack(const struct cper_sec_mem_err
*mem
,
281 struct cper_mem_err_compact
*cmem
)
283 cmem
->validation_bits
= mem
->validation_bits
;
284 cmem
->node
= mem
->node
;
285 cmem
->card
= mem
->card
;
286 cmem
->module
= mem
->module
;
287 cmem
->bank
= mem
->bank
;
288 cmem
->device
= mem
->device
;
289 cmem
->row
= mem
->row
;
290 cmem
->column
= mem
->column
;
291 cmem
->bit_pos
= mem
->bit_pos
;
292 cmem
->requestor_id
= mem
->requestor_id
;
293 cmem
->responder_id
= mem
->responder_id
;
294 cmem
->target_id
= mem
->target_id
;
295 cmem
->rank
= mem
->rank
;
296 cmem
->mem_array_handle
= mem
->mem_array_handle
;
297 cmem
->mem_dev_handle
= mem
->mem_dev_handle
;
300 const char *cper_mem_err_unpack(struct trace_seq
*p
,
301 struct cper_mem_err_compact
*cmem
)
303 const char *ret
= trace_seq_buffer_ptr(p
);
305 if (cper_mem_err_location(cmem
, rcd_decode_str
))
306 trace_seq_printf(p
, "%s", rcd_decode_str
);
307 if (cper_dimm_err_location(cmem
, rcd_decode_str
))
308 trace_seq_printf(p
, "%s", rcd_decode_str
);
309 trace_seq_putc(p
, '\0');
314 static void cper_print_mem(const char *pfx
, const struct cper_sec_mem_err
*mem
,
317 struct cper_mem_err_compact cmem
;
319 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
320 if (len
== sizeof(struct cper_sec_mem_err_old
) &&
321 (mem
->validation_bits
& ~(CPER_MEM_VALID_RANK_NUMBER
- 1))) {
322 pr_err(FW_WARN
"valid bits set for fields beyond structure\n");
325 if (mem
->validation_bits
& CPER_MEM_VALID_ERROR_STATUS
)
326 printk("%s""error_status: 0x%016llx\n", pfx
, mem
->error_status
);
327 if (mem
->validation_bits
& CPER_MEM_VALID_PA
)
328 printk("%s""physical_address: 0x%016llx\n",
329 pfx
, mem
->physical_addr
);
330 if (mem
->validation_bits
& CPER_MEM_VALID_PA_MASK
)
331 printk("%s""physical_address_mask: 0x%016llx\n",
332 pfx
, mem
->physical_addr_mask
);
333 cper_mem_err_pack(mem
, &cmem
);
334 if (cper_mem_err_location(&cmem
, rcd_decode_str
))
335 printk("%s%s\n", pfx
, rcd_decode_str
);
336 if (mem
->validation_bits
& CPER_MEM_VALID_ERROR_TYPE
) {
337 u8 etype
= mem
->error_type
;
338 printk("%s""error_type: %d, %s\n", pfx
, etype
,
339 cper_mem_err_type_str(etype
));
341 if (cper_dimm_err_location(&cmem
, rcd_decode_str
))
342 printk("%s%s\n", pfx
, rcd_decode_str
);
345 static const char * const pcie_port_type_strs
[] = {
347 "legacy PCI end point",
351 "upstream switch port",
352 "downstream switch port",
353 "PCIe to PCI/PCI-X bridge",
354 "PCI/PCI-X to PCIe bridge",
355 "root complex integrated endpoint device",
356 "root complex event collector",
359 static void cper_print_pcie(const char *pfx
, const struct cper_sec_pcie
*pcie
,
360 const struct acpi_hest_generic_data
*gdata
)
362 if (pcie
->validation_bits
& CPER_PCIE_VALID_PORT_TYPE
)
363 printk("%s""port_type: %d, %s\n", pfx
, pcie
->port_type
,
364 pcie
->port_type
< ARRAY_SIZE(pcie_port_type_strs
) ?
365 pcie_port_type_strs
[pcie
->port_type
] : "unknown");
366 if (pcie
->validation_bits
& CPER_PCIE_VALID_VERSION
)
367 printk("%s""version: %d.%d\n", pfx
,
368 pcie
->version
.major
, pcie
->version
.minor
);
369 if (pcie
->validation_bits
& CPER_PCIE_VALID_COMMAND_STATUS
)
370 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx
,
371 pcie
->command
, pcie
->status
);
372 if (pcie
->validation_bits
& CPER_PCIE_VALID_DEVICE_ID
) {
374 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx
,
375 pcie
->device_id
.segment
, pcie
->device_id
.bus
,
376 pcie
->device_id
.device
, pcie
->device_id
.function
);
377 printk("%s""slot: %d\n", pfx
,
378 pcie
->device_id
.slot
>> CPER_PCIE_SLOT_SHIFT
);
379 printk("%s""secondary_bus: 0x%02x\n", pfx
,
380 pcie
->device_id
.secondary_bus
);
381 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx
,
382 pcie
->device_id
.vendor_id
, pcie
->device_id
.device_id
);
383 p
= pcie
->device_id
.class_code
;
384 printk("%s""class_code: %02x%02x%02x\n", pfx
, p
[0], p
[1], p
[2]);
386 if (pcie
->validation_bits
& CPER_PCIE_VALID_SERIAL_NUMBER
)
387 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx
,
388 pcie
->serial_number
.lower
, pcie
->serial_number
.upper
);
389 if (pcie
->validation_bits
& CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS
)
391 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
392 pfx
, pcie
->bridge
.secondary_status
, pcie
->bridge
.control
);
395 static void cper_print_tstamp(const char *pfx
,
396 struct acpi_hest_generic_data_v300
*gdata
)
398 __u8 hour
, min
, sec
, day
, mon
, year
, century
, *timestamp
;
400 if (gdata
->validation_bits
& ACPI_HEST_GEN_VALID_TIMESTAMP
) {
401 timestamp
= (__u8
*)&(gdata
->time_stamp
);
402 sec
= bcd2bin(timestamp
[0]);
403 min
= bcd2bin(timestamp
[1]);
404 hour
= bcd2bin(timestamp
[2]);
405 day
= bcd2bin(timestamp
[4]);
406 mon
= bcd2bin(timestamp
[5]);
407 year
= bcd2bin(timestamp
[6]);
408 century
= bcd2bin(timestamp
[7]);
410 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx
,
411 (timestamp
[3] & 0x1 ? "precise " : "imprecise "),
412 century
, year
, mon
, day
, hour
, min
, sec
);
417 cper_estatus_print_section(const char *pfx
, struct acpi_hest_generic_data
*gdata
,
420 guid_t
*sec_type
= (guid_t
*)gdata
->section_type
;
424 if (acpi_hest_get_version(gdata
) >= 3)
425 cper_print_tstamp(pfx
, (struct acpi_hest_generic_data_v300
*)gdata
);
427 severity
= gdata
->error_severity
;
428 printk("%s""Error %d, type: %s\n", pfx
, sec_no
,
429 cper_severity_str(severity
));
430 if (gdata
->validation_bits
& CPER_SEC_VALID_FRU_ID
)
431 printk("%s""fru_id: %pUl\n", pfx
, gdata
->fru_id
);
432 if (gdata
->validation_bits
& CPER_SEC_VALID_FRU_TEXT
)
433 printk("%s""fru_text: %.20s\n", pfx
, gdata
->fru_text
);
435 snprintf(newpfx
, sizeof(newpfx
), "%s ", pfx
);
436 if (guid_equal(sec_type
, &CPER_SEC_PROC_GENERIC
)) {
437 struct cper_sec_proc_generic
*proc_err
= acpi_hest_get_payload(gdata
);
439 printk("%s""section_type: general processor error\n", newpfx
);
440 if (gdata
->error_data_length
>= sizeof(*proc_err
))
441 cper_print_proc_generic(newpfx
, proc_err
);
443 goto err_section_too_small
;
444 } else if (guid_equal(sec_type
, &CPER_SEC_PLATFORM_MEM
)) {
445 struct cper_sec_mem_err
*mem_err
= acpi_hest_get_payload(gdata
);
447 printk("%s""section_type: memory error\n", newpfx
);
448 if (gdata
->error_data_length
>=
449 sizeof(struct cper_sec_mem_err_old
))
450 cper_print_mem(newpfx
, mem_err
,
451 gdata
->error_data_length
);
453 goto err_section_too_small
;
454 } else if (guid_equal(sec_type
, &CPER_SEC_PCIE
)) {
455 struct cper_sec_pcie
*pcie
= acpi_hest_get_payload(gdata
);
457 printk("%s""section_type: PCIe error\n", newpfx
);
458 if (gdata
->error_data_length
>= sizeof(*pcie
))
459 cper_print_pcie(newpfx
, pcie
, gdata
);
461 goto err_section_too_small
;
462 #if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
463 } else if (guid_equal(sec_type
, &CPER_SEC_PROC_ARM
)) {
464 struct cper_sec_proc_arm
*arm_err
= acpi_hest_get_payload(gdata
);
466 printk("%ssection_type: ARM processor error\n", newpfx
);
467 if (gdata
->error_data_length
>= sizeof(*arm_err
))
468 cper_print_proc_arm(newpfx
, arm_err
);
470 goto err_section_too_small
;
472 #if defined(CONFIG_UEFI_CPER_X86)
473 } else if (guid_equal(sec_type
, &CPER_SEC_PROC_IA
)) {
474 struct cper_sec_proc_ia
*ia_err
= acpi_hest_get_payload(gdata
);
476 printk("%ssection_type: IA32/X64 processor error\n", newpfx
);
477 if (gdata
->error_data_length
>= sizeof(*ia_err
))
478 cper_print_proc_ia(newpfx
, ia_err
);
480 goto err_section_too_small
;
483 const void *err
= acpi_hest_get_payload(gdata
);
485 printk("%ssection type: unknown, %pUl\n", newpfx
, sec_type
);
486 printk("%ssection length: %#x\n", newpfx
,
487 gdata
->error_data_length
);
488 print_hex_dump(newpfx
, "", DUMP_PREFIX_OFFSET
, 16, 4, err
,
489 gdata
->error_data_length
, true);
494 err_section_too_small
:
495 pr_err(FW_WARN
"error section length is too small\n");
498 void cper_estatus_print(const char *pfx
,
499 const struct acpi_hest_generic_status
*estatus
)
501 struct acpi_hest_generic_data
*gdata
;
506 severity
= estatus
->error_severity
;
507 if (severity
== CPER_SEV_CORRECTED
)
508 printk("%s%s\n", pfx
,
509 "It has been corrected by h/w "
510 "and requires no further action");
511 printk("%s""event severity: %s\n", pfx
, cper_severity_str(severity
));
512 snprintf(newpfx
, sizeof(newpfx
), "%s ", pfx
);
514 apei_estatus_for_each_section(estatus
, gdata
) {
515 cper_estatus_print_section(newpfx
, gdata
, sec_no
);
519 EXPORT_SYMBOL_GPL(cper_estatus_print
);
521 int cper_estatus_check_header(const struct acpi_hest_generic_status
*estatus
)
523 if (estatus
->data_length
&&
524 estatus
->data_length
< sizeof(struct acpi_hest_generic_data
))
526 if (estatus
->raw_data_length
&&
527 estatus
->raw_data_offset
< sizeof(*estatus
) + estatus
->data_length
)
532 EXPORT_SYMBOL_GPL(cper_estatus_check_header
);
534 int cper_estatus_check(const struct acpi_hest_generic_status
*estatus
)
536 struct acpi_hest_generic_data
*gdata
;
537 unsigned int data_len
, record_size
;
540 rc
= cper_estatus_check_header(estatus
);
544 data_len
= estatus
->data_length
;
546 apei_estatus_for_each_section(estatus
, gdata
) {
547 if (sizeof(struct acpi_hest_generic_data
) > data_len
)
550 record_size
= acpi_hest_get_record_size(gdata
);
551 if (record_size
> data_len
)
554 data_len
-= record_size
;
561 EXPORT_SYMBOL_GPL(cper_estatus_check
);