Linux 4.19.133
[linux/fpc-iii.git] / drivers / firmware / efi / cper.c
blob116989cf3d4571e447291d9909743cdfa1850935
1 /*
2 * UEFI Common Platform Error Record (CPER) support
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
7 * CPER is the format used to describe platform hardware error by
8 * various tables, such as ERST, BERT and HEST etc.
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.4.
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/dmi.h>
32 #include <linux/acpi.h>
33 #include <linux/pci.h>
34 #include <linux/aer.h>
35 #include <linux/printk.h>
36 #include <linux/bcd.h>
37 #include <acpi/ghes.h>
38 #include <ras/ras_event.h>
40 static char rcd_decode_str[CPER_REC_LEN];
43 * CPER record ID need to be unique even after reboot, because record
44 * ID is used as index for ERST storage, while CPER records from
45 * multiple boot may co-exist in ERST.
47 u64 cper_next_record_id(void)
49 static atomic64_t seq;
51 if (!atomic64_read(&seq)) {
52 time64_t time = ktime_get_real_seconds();
55 * This code is unlikely to still be needed in year 2106,
56 * but just in case, let's use a few more bits for timestamps
57 * after y2038 to be sure they keep increasing monotonically
58 * for the next few hundred years...
60 if (time < 0x80000000)
61 atomic64_set(&seq, (ktime_get_real_seconds()) << 32);
62 else
63 atomic64_set(&seq, 0x8000000000000000ull |
64 ktime_get_real_seconds() << 24);
67 return atomic64_inc_return(&seq);
69 EXPORT_SYMBOL_GPL(cper_next_record_id);
71 static const char * const severity_strs[] = {
72 "recoverable",
73 "fatal",
74 "corrected",
75 "info",
78 const char *cper_severity_str(unsigned int severity)
80 return severity < ARRAY_SIZE(severity_strs) ?
81 severity_strs[severity] : "unknown";
83 EXPORT_SYMBOL_GPL(cper_severity_str);
86 * cper_print_bits - print strings for set bits
87 * @pfx: prefix for each line, including log level and prefix string
88 * @bits: bit mask
89 * @strs: string array, indexed by bit position
90 * @strs_size: size of the string array: @strs
92 * For each set bit in @bits, print the corresponding string in @strs.
93 * If the output length is longer than 80, multiple line will be
94 * printed, with @pfx is printed at the beginning of each line.
96 void cper_print_bits(const char *pfx, unsigned int bits,
97 const char * const strs[], unsigned int strs_size)
99 int i, len = 0;
100 const char *str;
101 char buf[84];
103 for (i = 0; i < strs_size; i++) {
104 if (!(bits & (1U << i)))
105 continue;
106 str = strs[i];
107 if (!str)
108 continue;
109 if (len && len + strlen(str) + 2 > 80) {
110 printk("%s\n", buf);
111 len = 0;
113 if (!len)
114 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
115 else
116 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
118 if (len)
119 printk("%s\n", buf);
122 static const char * const proc_type_strs[] = {
123 "IA32/X64",
124 "IA64",
125 "ARM",
128 static const char * const proc_isa_strs[] = {
129 "IA32",
130 "IA64",
131 "X64",
132 "ARM A32/T32",
133 "ARM A64",
136 const char * const cper_proc_error_type_strs[] = {
137 "cache error",
138 "TLB error",
139 "bus error",
140 "micro-architectural error",
143 static const char * const proc_op_strs[] = {
144 "unknown or generic",
145 "data read",
146 "data write",
147 "instruction execution",
150 static const char * const proc_flag_strs[] = {
151 "restartable",
152 "precise IP",
153 "overflow",
154 "corrected",
157 static void cper_print_proc_generic(const char *pfx,
158 const struct cper_sec_proc_generic *proc)
160 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
161 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
162 proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
163 proc_type_strs[proc->proc_type] : "unknown");
164 if (proc->validation_bits & CPER_PROC_VALID_ISA)
165 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
166 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
167 proc_isa_strs[proc->proc_isa] : "unknown");
168 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
169 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
170 cper_print_bits(pfx, proc->proc_error_type,
171 cper_proc_error_type_strs,
172 ARRAY_SIZE(cper_proc_error_type_strs));
174 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
175 printk("%s""operation: %d, %s\n", pfx, proc->operation,
176 proc->operation < ARRAY_SIZE(proc_op_strs) ?
177 proc_op_strs[proc->operation] : "unknown");
178 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
179 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
180 cper_print_bits(pfx, proc->flags, proc_flag_strs,
181 ARRAY_SIZE(proc_flag_strs));
183 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
184 printk("%s""level: %d\n", pfx, proc->level);
185 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
186 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
187 if (proc->validation_bits & CPER_PROC_VALID_ID)
188 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
189 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
190 printk("%s""target_address: 0x%016llx\n",
191 pfx, proc->target_addr);
192 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
193 printk("%s""requestor_id: 0x%016llx\n",
194 pfx, proc->requestor_id);
195 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
196 printk("%s""responder_id: 0x%016llx\n",
197 pfx, proc->responder_id);
198 if (proc->validation_bits & CPER_PROC_VALID_IP)
199 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
202 static const char * const mem_err_type_strs[] = {
203 "unknown",
204 "no error",
205 "single-bit ECC",
206 "multi-bit ECC",
207 "single-symbol chipkill ECC",
208 "multi-symbol chipkill ECC",
209 "master abort",
210 "target abort",
211 "parity error",
212 "watchdog timeout",
213 "invalid address",
214 "mirror Broken",
215 "memory sparing",
216 "scrub corrected error",
217 "scrub uncorrected error",
218 "physical memory map-out event",
221 const char *cper_mem_err_type_str(unsigned int etype)
223 return etype < ARRAY_SIZE(mem_err_type_strs) ?
224 mem_err_type_strs[etype] : "unknown";
226 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
228 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
230 u32 len, n;
232 if (!msg)
233 return 0;
235 n = 0;
236 len = CPER_REC_LEN - 1;
237 if (mem->validation_bits & CPER_MEM_VALID_NODE)
238 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
239 if (mem->validation_bits & CPER_MEM_VALID_CARD)
240 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
241 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
242 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
243 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
244 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
245 if (mem->validation_bits & CPER_MEM_VALID_BANK)
246 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
247 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
248 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
249 if (mem->validation_bits & CPER_MEM_VALID_ROW)
250 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
251 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
252 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
253 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
254 n += scnprintf(msg + n, len - n, "bit_position: %d ",
255 mem->bit_pos);
256 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
257 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
258 mem->requestor_id);
259 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
260 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
261 mem->responder_id);
262 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
263 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
264 mem->target_id);
266 msg[n] = '\0';
267 return n;
270 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
272 u32 len, n;
273 const char *bank = NULL, *device = NULL;
275 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
276 return 0;
278 n = 0;
279 len = CPER_REC_LEN - 1;
280 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
281 if (bank && device)
282 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
283 else
284 n = snprintf(msg, len,
285 "DIMM location: not present. DMI handle: 0x%.4x ",
286 mem->mem_dev_handle);
288 msg[n] = '\0';
289 return n;
292 void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
293 struct cper_mem_err_compact *cmem)
295 cmem->validation_bits = mem->validation_bits;
296 cmem->node = mem->node;
297 cmem->card = mem->card;
298 cmem->module = mem->module;
299 cmem->bank = mem->bank;
300 cmem->device = mem->device;
301 cmem->row = mem->row;
302 cmem->column = mem->column;
303 cmem->bit_pos = mem->bit_pos;
304 cmem->requestor_id = mem->requestor_id;
305 cmem->responder_id = mem->responder_id;
306 cmem->target_id = mem->target_id;
307 cmem->rank = mem->rank;
308 cmem->mem_array_handle = mem->mem_array_handle;
309 cmem->mem_dev_handle = mem->mem_dev_handle;
312 const char *cper_mem_err_unpack(struct trace_seq *p,
313 struct cper_mem_err_compact *cmem)
315 const char *ret = trace_seq_buffer_ptr(p);
317 if (cper_mem_err_location(cmem, rcd_decode_str))
318 trace_seq_printf(p, "%s", rcd_decode_str);
319 if (cper_dimm_err_location(cmem, rcd_decode_str))
320 trace_seq_printf(p, "%s", rcd_decode_str);
321 trace_seq_putc(p, '\0');
323 return ret;
326 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
327 int len)
329 struct cper_mem_err_compact cmem;
331 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
332 if (len == sizeof(struct cper_sec_mem_err_old) &&
333 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
334 pr_err(FW_WARN "valid bits set for fields beyond structure\n");
335 return;
337 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
338 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
339 if (mem->validation_bits & CPER_MEM_VALID_PA)
340 printk("%s""physical_address: 0x%016llx\n",
341 pfx, mem->physical_addr);
342 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
343 printk("%s""physical_address_mask: 0x%016llx\n",
344 pfx, mem->physical_addr_mask);
345 cper_mem_err_pack(mem, &cmem);
346 if (cper_mem_err_location(&cmem, rcd_decode_str))
347 printk("%s%s\n", pfx, rcd_decode_str);
348 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
349 u8 etype = mem->error_type;
350 printk("%s""error_type: %d, %s\n", pfx, etype,
351 cper_mem_err_type_str(etype));
353 if (cper_dimm_err_location(&cmem, rcd_decode_str))
354 printk("%s%s\n", pfx, rcd_decode_str);
357 static const char * const pcie_port_type_strs[] = {
358 "PCIe end point",
359 "legacy PCI end point",
360 "unknown",
361 "unknown",
362 "root port",
363 "upstream switch port",
364 "downstream switch port",
365 "PCIe to PCI/PCI-X bridge",
366 "PCI/PCI-X to PCIe bridge",
367 "root complex integrated endpoint device",
368 "root complex event collector",
371 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
372 const struct acpi_hest_generic_data *gdata)
374 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
375 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
376 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
377 pcie_port_type_strs[pcie->port_type] : "unknown");
378 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
379 printk("%s""version: %d.%d\n", pfx,
380 pcie->version.major, pcie->version.minor);
381 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
382 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
383 pcie->command, pcie->status);
384 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
385 const __u8 *p;
386 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
387 pcie->device_id.segment, pcie->device_id.bus,
388 pcie->device_id.device, pcie->device_id.function);
389 printk("%s""slot: %d\n", pfx,
390 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
391 printk("%s""secondary_bus: 0x%02x\n", pfx,
392 pcie->device_id.secondary_bus);
393 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
394 pcie->device_id.vendor_id, pcie->device_id.device_id);
395 p = pcie->device_id.class_code;
396 printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]);
398 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
399 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
400 pcie->serial_number.lower, pcie->serial_number.upper);
401 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
402 printk(
403 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
404 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
406 /* Fatal errors call __ghes_panic() before AER handler prints this */
407 if ((pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) &&
408 (gdata->error_severity & CPER_SEV_FATAL)) {
409 struct aer_capability_regs *aer;
411 aer = (struct aer_capability_regs *)pcie->aer_info;
412 printk("%saer_uncor_status: 0x%08x, aer_uncor_mask: 0x%08x\n",
413 pfx, aer->uncor_status, aer->uncor_mask);
414 printk("%saer_uncor_severity: 0x%08x\n",
415 pfx, aer->uncor_severity);
416 printk("%sTLP Header: %08x %08x %08x %08x\n", pfx,
417 aer->header_log.dw0, aer->header_log.dw1,
418 aer->header_log.dw2, aer->header_log.dw3);
422 static void cper_print_tstamp(const char *pfx,
423 struct acpi_hest_generic_data_v300 *gdata)
425 __u8 hour, min, sec, day, mon, year, century, *timestamp;
427 if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
428 timestamp = (__u8 *)&(gdata->time_stamp);
429 sec = bcd2bin(timestamp[0]);
430 min = bcd2bin(timestamp[1]);
431 hour = bcd2bin(timestamp[2]);
432 day = bcd2bin(timestamp[4]);
433 mon = bcd2bin(timestamp[5]);
434 year = bcd2bin(timestamp[6]);
435 century = bcd2bin(timestamp[7]);
437 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
438 (timestamp[3] & 0x1 ? "precise " : "imprecise "),
439 century, year, mon, day, hour, min, sec);
443 static void
444 cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
445 int sec_no)
447 guid_t *sec_type = (guid_t *)gdata->section_type;
448 __u16 severity;
449 char newpfx[64];
451 if (acpi_hest_get_version(gdata) >= 3)
452 cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
454 severity = gdata->error_severity;
455 printk("%s""Error %d, type: %s\n", pfx, sec_no,
456 cper_severity_str(severity));
457 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
458 printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
459 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
460 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
462 snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
463 if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
464 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
466 printk("%s""section_type: general processor error\n", newpfx);
467 if (gdata->error_data_length >= sizeof(*proc_err))
468 cper_print_proc_generic(newpfx, proc_err);
469 else
470 goto err_section_too_small;
471 } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
472 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
474 printk("%s""section_type: memory error\n", newpfx);
475 if (gdata->error_data_length >=
476 sizeof(struct cper_sec_mem_err_old))
477 cper_print_mem(newpfx, mem_err,
478 gdata->error_data_length);
479 else
480 goto err_section_too_small;
481 } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
482 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
484 printk("%s""section_type: PCIe error\n", newpfx);
485 if (gdata->error_data_length >= sizeof(*pcie))
486 cper_print_pcie(newpfx, pcie, gdata);
487 else
488 goto err_section_too_small;
489 #if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
490 } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
491 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
493 printk("%ssection_type: ARM processor error\n", newpfx);
494 if (gdata->error_data_length >= sizeof(*arm_err))
495 cper_print_proc_arm(newpfx, arm_err);
496 else
497 goto err_section_too_small;
498 #endif
499 #if defined(CONFIG_UEFI_CPER_X86)
500 } else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
501 struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
503 printk("%ssection_type: IA32/X64 processor error\n", newpfx);
504 if (gdata->error_data_length >= sizeof(*ia_err))
505 cper_print_proc_ia(newpfx, ia_err);
506 else
507 goto err_section_too_small;
508 #endif
509 } else {
510 const void *err = acpi_hest_get_payload(gdata);
512 printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
513 printk("%ssection length: %#x\n", newpfx,
514 gdata->error_data_length);
515 print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
516 gdata->error_data_length, true);
519 return;
521 err_section_too_small:
522 pr_err(FW_WARN "error section length is too small\n");
525 void cper_estatus_print(const char *pfx,
526 const struct acpi_hest_generic_status *estatus)
528 struct acpi_hest_generic_data *gdata;
529 int sec_no = 0;
530 char newpfx[64];
531 __u16 severity;
533 severity = estatus->error_severity;
534 if (severity == CPER_SEV_CORRECTED)
535 printk("%s%s\n", pfx,
536 "It has been corrected by h/w "
537 "and requires no further action");
538 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
539 snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
541 apei_estatus_for_each_section(estatus, gdata) {
542 cper_estatus_print_section(newpfx, gdata, sec_no);
543 sec_no++;
546 EXPORT_SYMBOL_GPL(cper_estatus_print);
548 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
550 if (estatus->data_length &&
551 estatus->data_length < sizeof(struct acpi_hest_generic_data))
552 return -EINVAL;
553 if (estatus->raw_data_length &&
554 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
555 return -EINVAL;
557 return 0;
559 EXPORT_SYMBOL_GPL(cper_estatus_check_header);
561 int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
563 struct acpi_hest_generic_data *gdata;
564 unsigned int data_len, record_size;
565 int rc;
567 rc = cper_estatus_check_header(estatus);
568 if (rc)
569 return rc;
571 data_len = estatus->data_length;
573 apei_estatus_for_each_section(estatus, gdata) {
574 if (sizeof(struct acpi_hest_generic_data) > data_len)
575 return -EINVAL;
577 record_size = acpi_hest_get_record_size(gdata);
578 if (record_size > data_len)
579 return -EINVAL;
581 data_len -= record_size;
583 if (data_len)
584 return -EINVAL;
586 return 0;
588 EXPORT_SYMBOL_GPL(cper_estatus_check);