1 // SPDX-License-Identifier: GPL-2.0
4 * Shared code by both skx_edac and i10nm_edac. Originally split out
5 * from the skx_edac driver.
7 * This file is linked into both skx_edac and i10nm_edac drivers. In
8 * order to avoid link errors, this file must be like a pure library
9 * without including symbols and defines which would otherwise conflict,
10 * when linked once into a module and into a built-in object, at the
11 * same time. For example, __this_module symbol references when that
12 * file is being linked into a built-in object.
14 * Copyright (c) 2018, Intel Corporation.
17 #include <linux/acpi.h>
18 #include <linux/dmi.h>
19 #include <linux/adxl.h>
20 #include <acpi/nfit.h>
22 #include "edac_module.h"
23 #include "skx_common.h"
25 static const char * const component_names
[] = {
26 [INDEX_SOCKET
] = "ProcessorSocketId",
27 [INDEX_MEMCTRL
] = "MemoryControllerId",
28 [INDEX_CHANNEL
] = "ChannelId",
29 [INDEX_DIMM
] = "DimmSlotId",
32 static int component_indices
[ARRAY_SIZE(component_names
)];
33 static int adxl_component_count
;
34 static const char * const *adxl_component_names
;
35 static u64
*adxl_values
;
36 static char *adxl_msg
;
38 static char skx_msg
[MSG_SIZE
];
39 static skx_decode_f skx_decode
;
40 static skx_show_retry_log_f skx_show_retry_rd_err_log
;
41 static u64 skx_tolm
, skx_tohm
;
42 static LIST_HEAD(dev_edac_list
);
44 int __init
skx_adxl_get(void)
46 const char * const *names
;
49 names
= adxl_get_component_names();
51 skx_printk(KERN_NOTICE
, "No firmware support for address translation.\n");
55 for (i
= 0; i
< INDEX_MAX
; i
++) {
56 for (j
= 0; names
[j
]; j
++) {
57 if (!strcmp(component_names
[i
], names
[j
])) {
58 component_indices
[i
] = j
;
67 adxl_component_names
= names
;
69 adxl_component_count
++;
71 adxl_values
= kcalloc(adxl_component_count
, sizeof(*adxl_values
),
74 adxl_component_count
= 0;
78 adxl_msg
= kzalloc(MSG_SIZE
, GFP_KERNEL
);
80 adxl_component_count
= 0;
87 skx_printk(KERN_ERR
, "'%s' is not matched from DSM parameters: ",
89 for (j
= 0; names
[j
]; j
++)
90 skx_printk(KERN_CONT
, "%s ", names
[j
]);
91 skx_printk(KERN_CONT
, "\n");
96 void __exit
skx_adxl_put(void)
102 static bool skx_adxl_decode(struct decoded_addr
*res
)
107 if (res
->addr
>= skx_tohm
|| (res
->addr
>= skx_tolm
&&
108 res
->addr
< BIT_ULL(32))) {
109 edac_dbg(0, "Address 0x%llx out of range\n", res
->addr
);
113 if (adxl_decode(res
->addr
, adxl_values
)) {
114 edac_dbg(0, "Failed to decode 0x%llx\n", res
->addr
);
118 res
->socket
= (int)adxl_values
[component_indices
[INDEX_SOCKET
]];
119 res
->imc
= (int)adxl_values
[component_indices
[INDEX_MEMCTRL
]];
120 res
->channel
= (int)adxl_values
[component_indices
[INDEX_CHANNEL
]];
121 res
->dimm
= (int)adxl_values
[component_indices
[INDEX_DIMM
]];
123 if (res
->imc
> NUM_IMC
- 1) {
124 skx_printk(KERN_ERR
, "Bad imc %d\n", res
->imc
);
128 list_for_each_entry(d
, &dev_edac_list
, list
) {
129 if (d
->imc
[0].src_id
== res
->socket
) {
136 skx_printk(KERN_ERR
, "No device for src_id %d imc %d\n",
137 res
->socket
, res
->imc
);
141 for (i
= 0; i
< adxl_component_count
; i
++) {
142 if (adxl_values
[i
] == ~0x0ull
)
145 len
+= snprintf(adxl_msg
+ len
, MSG_SIZE
- len
, " %s:0x%llx",
146 adxl_component_names
[i
], adxl_values
[i
]);
147 if (MSG_SIZE
- len
<= 0)
154 void skx_set_decode(skx_decode_f decode
, skx_show_retry_log_f show_retry_log
)
157 skx_show_retry_rd_err_log
= show_retry_log
;
160 int skx_get_src_id(struct skx_dev
*d
, int off
, u8
*id
)
164 if (pci_read_config_dword(d
->util_all
, off
, ®
)) {
165 skx_printk(KERN_ERR
, "Failed to read src id\n");
169 *id
= GET_BITFIELD(reg
, 12, 14);
173 int skx_get_node_id(struct skx_dev
*d
, u8
*id
)
177 if (pci_read_config_dword(d
->util_all
, 0xf4, ®
)) {
178 skx_printk(KERN_ERR
, "Failed to read node id\n");
182 *id
= GET_BITFIELD(reg
, 0, 2);
186 static int get_width(u32 mtr
)
188 switch (GET_BITFIELD(mtr
, 8, 9)) {
200 * We use the per-socket device @cfg->did to count how many sockets are present,
201 * and to detemine which PCI buses are associated with each socket. Allocate
202 * and build the full list of all the skx_dev structures that we need here.
204 int skx_get_all_bus_mappings(struct res_config
*cfg
, struct list_head
**list
)
206 struct pci_dev
*pdev
, *prev
;
213 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, cfg
->decs_did
, prev
);
217 d
= kzalloc(sizeof(*d
), GFP_KERNEL
);
223 if (pci_read_config_dword(pdev
, cfg
->busno_cfg_offset
, ®
)) {
226 skx_printk(KERN_ERR
, "Failed to read bus idx\n");
230 d
->bus
[0] = GET_BITFIELD(reg
, 0, 7);
231 d
->bus
[1] = GET_BITFIELD(reg
, 8, 15);
232 if (cfg
->type
== SKX
) {
233 d
->seg
= pci_domain_nr(pdev
->bus
);
234 d
->bus
[2] = GET_BITFIELD(reg
, 16, 23);
235 d
->bus
[3] = GET_BITFIELD(reg
, 24, 31);
237 d
->seg
= GET_BITFIELD(reg
, 16, 23);
240 edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
241 d
->bus
[0], d
->bus
[1], d
->bus
[2], d
->bus
[3]);
242 list_add_tail(&d
->list
, &dev_edac_list
);
247 *list
= &dev_edac_list
;
251 int skx_get_hi_lo(unsigned int did
, int off
[], u64
*tolm
, u64
*tohm
)
253 struct pci_dev
*pdev
;
256 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, did
, NULL
);
258 edac_dbg(2, "Can't get tolm/tohm\n");
262 if (pci_read_config_dword(pdev
, off
[0], ®
)) {
263 skx_printk(KERN_ERR
, "Failed to read tolm\n");
268 if (pci_read_config_dword(pdev
, off
[1], ®
)) {
269 skx_printk(KERN_ERR
, "Failed to read lower tohm\n");
274 if (pci_read_config_dword(pdev
, off
[2], ®
)) {
275 skx_printk(KERN_ERR
, "Failed to read upper tohm\n");
278 skx_tohm
|= (u64
)reg
<< 32;
283 edac_dbg(2, "tolm = 0x%llx tohm = 0x%llx\n", skx_tolm
, skx_tohm
);
290 static int skx_get_dimm_attr(u32 reg
, int lobit
, int hibit
, int add
,
291 int minval
, int maxval
, const char *name
)
293 u32 val
= GET_BITFIELD(reg
, lobit
, hibit
);
295 if (val
< minval
|| val
> maxval
) {
296 edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name
, val
, reg
);
302 #define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks")
303 #define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows")
304 #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols")
306 int skx_get_dimm_info(u32 mtr
, u32 mcmtr
, u32 amap
, struct dimm_info
*dimm
,
307 struct skx_imc
*imc
, int chan
, int dimmno
)
309 int banks
= 16, ranks
, rows
, cols
, npages
;
312 ranks
= numrank(mtr
);
317 * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
319 size
= ((1ull << (rows
+ cols
+ ranks
)) * banks
) >> (20 - 3);
320 npages
= MiB_TO_PAGES(size
);
322 edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%x, col: 0x%x\n",
323 imc
->mc
, chan
, dimmno
, size
, npages
,
324 banks
, 1 << ranks
, rows
, cols
);
326 imc
->chan
[chan
].dimms
[dimmno
].close_pg
= GET_BITFIELD(mcmtr
, 0, 0);
327 imc
->chan
[chan
].dimms
[dimmno
].bank_xor_enable
= GET_BITFIELD(mcmtr
, 9, 9);
328 imc
->chan
[chan
].dimms
[dimmno
].fine_grain_bank
= GET_BITFIELD(amap
, 0, 0);
329 imc
->chan
[chan
].dimms
[dimmno
].rowbits
= rows
;
330 imc
->chan
[chan
].dimms
[dimmno
].colbits
= cols
;
332 dimm
->nr_pages
= npages
;
334 dimm
->dtype
= get_width(mtr
);
335 dimm
->mtype
= MEM_DDR4
;
336 dimm
->edac_mode
= EDAC_SECDED
; /* likely better than this */
337 snprintf(dimm
->label
, sizeof(dimm
->label
), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
338 imc
->src_id
, imc
->lmc
, chan
, dimmno
);
343 int skx_get_nvdimm_info(struct dimm_info
*dimm
, struct skx_imc
*imc
,
344 int chan
, int dimmno
, const char *mod_str
)
351 dev_handle
= ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno
, chan
, imc
->lmc
,
354 smbios_handle
= nfit_get_smbios_id(dev_handle
, &flags
);
355 if (smbios_handle
== -EOPNOTSUPP
) {
356 pr_warn_once("%s: Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n", mod_str
);
360 if (smbios_handle
< 0) {
361 skx_printk(KERN_ERR
, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle
);
365 if (flags
& ACPI_NFIT_MEM_MAP_FAILED
) {
366 skx_printk(KERN_ERR
, "NVDIMM ADR=0x%x is not mapped\n", dev_handle
);
370 size
= dmi_memdev_size(smbios_handle
);
372 skx_printk(KERN_ERR
, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n",
373 dev_handle
, smbios_handle
);
376 dimm
->nr_pages
= size
>> PAGE_SHIFT
;
378 dimm
->dtype
= DEV_UNKNOWN
;
379 dimm
->mtype
= MEM_NVDIMM
;
380 dimm
->edac_mode
= EDAC_SECDED
; /* likely better than this */
382 edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n",
383 imc
->mc
, chan
, dimmno
, size
>> 20, dimm
->nr_pages
);
385 snprintf(dimm
->label
, sizeof(dimm
->label
), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
386 imc
->src_id
, imc
->lmc
, chan
, dimmno
);
388 return (size
== 0 || size
== ~0ull) ? 0 : 1;
391 int skx_register_mci(struct skx_imc
*imc
, struct pci_dev
*pdev
,
392 const char *ctl_name
, const char *mod_str
,
393 get_dimm_config_f get_dimm_config
)
395 struct mem_ctl_info
*mci
;
396 struct edac_mc_layer layers
[2];
400 /* Allocate a new MC control structure */
401 layers
[0].type
= EDAC_MC_LAYER_CHANNEL
;
402 layers
[0].size
= NUM_CHANNELS
;
403 layers
[0].is_virt_csrow
= false;
404 layers
[1].type
= EDAC_MC_LAYER_SLOT
;
405 layers
[1].size
= NUM_DIMMS
;
406 layers
[1].is_virt_csrow
= true;
407 mci
= edac_mc_alloc(imc
->mc
, ARRAY_SIZE(layers
), layers
,
408 sizeof(struct skx_pvt
));
413 edac_dbg(0, "MC#%d: mci = %p\n", imc
->mc
, mci
);
415 /* Associate skx_dev and mci for future usage */
420 mci
->ctl_name
= kasprintf(GFP_KERNEL
, "%s#%d IMC#%d", ctl_name
,
421 imc
->node_id
, imc
->lmc
);
422 if (!mci
->ctl_name
) {
427 mci
->mtype_cap
= MEM_FLAG_DDR4
| MEM_FLAG_NVDIMM
;
428 mci
->edac_ctl_cap
= EDAC_FLAG_NONE
;
429 mci
->edac_cap
= EDAC_FLAG_NONE
;
430 mci
->mod_name
= mod_str
;
431 mci
->dev_name
= pci_name(pdev
);
432 mci
->ctl_page_to_phys
= NULL
;
434 rc
= get_dimm_config(mci
);
438 /* Record ptr to the generic device */
439 mci
->pdev
= &pdev
->dev
;
441 /* Add this new MC control structure to EDAC's list of MCs */
442 if (unlikely(edac_mc_add_mc(mci
))) {
443 edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
451 kfree(mci
->ctl_name
);
458 static void skx_unregister_mci(struct skx_imc
*imc
)
460 struct mem_ctl_info
*mci
= imc
->mci
;
465 edac_dbg(0, "MC%d: mci = %p\n", imc
->mc
, mci
);
467 /* Remove MC sysfs nodes */
468 edac_mc_del_mc(mci
->pdev
);
470 edac_dbg(1, "%s: free mci struct\n", mci
->ctl_name
);
471 kfree(mci
->ctl_name
);
475 static void skx_mce_output_error(struct mem_ctl_info
*mci
,
477 struct decoded_addr
*res
)
479 enum hw_event_mc_err_type tp_event
;
481 bool ripv
= GET_BITFIELD(m
->mcgstatus
, 0, 0);
482 bool overflow
= GET_BITFIELD(m
->status
, 62, 62);
483 bool uncorrected_error
= GET_BITFIELD(m
->status
, 61, 61);
486 u32 core_err_cnt
= GET_BITFIELD(m
->status
, 38, 52);
487 u32 mscod
= GET_BITFIELD(m
->status
, 16, 31);
488 u32 errcode
= GET_BITFIELD(m
->status
, 0, 15);
489 u32 optypenum
= GET_BITFIELD(m
->status
, 4, 6);
491 recoverable
= GET_BITFIELD(m
->status
, 56, 56);
493 if (uncorrected_error
) {
496 tp_event
= HW_EVENT_ERR_FATAL
;
498 tp_event
= HW_EVENT_ERR_UNCORRECTED
;
501 tp_event
= HW_EVENT_ERR_CORRECTED
;
505 * According to Intel Architecture spec vol 3B,
506 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
507 * memory errors should fit one of these masks:
508 * 000f 0000 1mmm cccc (binary)
509 * 000f 0010 1mmm cccc (binary) [RAM used as cache]
511 * f = Correction Report Filtering Bit. If 1, subsequent errors
515 * If the mask doesn't match, report an error to the parsing logic
517 if (!((errcode
& 0xef80) == 0x80 || (errcode
& 0xef80) == 0x280)) {
518 optype
= "Can't parse: it is not a mem";
522 optype
= "generic undef request error";
525 optype
= "memory read error";
528 optype
= "memory write error";
531 optype
= "addr/cmd error";
534 optype
= "memory scrubbing error";
541 if (adxl_component_count
) {
542 len
= snprintf(skx_msg
, MSG_SIZE
, "%s%s err_code:0x%04x:0x%04x %s",
543 overflow
? " OVERFLOW" : "",
544 (uncorrected_error
&& recoverable
) ? " recoverable" : "",
545 mscod
, errcode
, adxl_msg
);
547 len
= snprintf(skx_msg
, MSG_SIZE
,
548 "%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x",
549 overflow
? " OVERFLOW" : "",
550 (uncorrected_error
&& recoverable
) ? " recoverable" : "",
552 res
->socket
, res
->imc
, res
->rank
,
553 res
->bank_group
, res
->bank_address
, res
->row
, res
->column
);
556 if (skx_show_retry_rd_err_log
)
557 skx_show_retry_rd_err_log(res
, skx_msg
+ len
, MSG_SIZE
- len
);
559 edac_dbg(0, "%s\n", skx_msg
);
561 /* Call the helper to output message */
562 edac_mc_handle_error(tp_event
, mci
, core_err_cnt
,
563 m
->addr
>> PAGE_SHIFT
, m
->addr
& ~PAGE_MASK
, 0,
564 res
->channel
, res
->dimm
, -1,
568 int skx_mce_check_error(struct notifier_block
*nb
, unsigned long val
,
571 struct mce
*mce
= (struct mce
*)data
;
572 struct decoded_addr res
;
573 struct mem_ctl_info
*mci
;
576 if (mce
->kflags
& MCE_HANDLED_CEC
)
579 /* ignore unless this is memory related with an address */
580 if ((mce
->status
& 0xefff) >> 7 != 1 || !(mce
->status
& MCI_STATUS_ADDRV
))
583 memset(&res
, 0, sizeof(res
));
584 res
.addr
= mce
->addr
;
586 if (adxl_component_count
) {
587 if (!skx_adxl_decode(&res
))
589 } else if (!skx_decode
|| !skx_decode(&res
)) {
593 mci
= res
.dev
->imc
[res
.imc
].mci
;
598 if (mce
->mcgstatus
& MCG_STATUS_MCIP
)
603 skx_mc_printk(mci
, KERN_DEBUG
, "HANDLING MCE MEMORY ERROR\n");
605 skx_mc_printk(mci
, KERN_DEBUG
, "CPU %d: Machine Check %s: 0x%llx "
606 "Bank %d: 0x%llx\n", mce
->extcpu
, type
,
607 mce
->mcgstatus
, mce
->bank
, mce
->status
);
608 skx_mc_printk(mci
, KERN_DEBUG
, "TSC 0x%llx ", mce
->tsc
);
609 skx_mc_printk(mci
, KERN_DEBUG
, "ADDR 0x%llx ", mce
->addr
);
610 skx_mc_printk(mci
, KERN_DEBUG
, "MISC 0x%llx ", mce
->misc
);
612 skx_mc_printk(mci
, KERN_DEBUG
, "PROCESSOR %u:0x%x TIME %llu SOCKET "
613 "%u APIC 0x%x\n", mce
->cpuvendor
, mce
->cpuid
,
614 mce
->time
, mce
->socketid
, mce
->apicid
);
616 skx_mce_output_error(mci
, mce
, &res
);
618 mce
->kflags
|= MCE_HANDLED_EDAC
;
622 void skx_remove(void)
625 struct skx_dev
*d
, *tmp
;
629 list_for_each_entry_safe(d
, tmp
, &dev_edac_list
, list
) {
631 for (i
= 0; i
< NUM_IMC
; i
++) {
633 skx_unregister_mci(&d
->imc
[i
]);
636 pci_dev_put(d
->imc
[i
].mdev
);
639 iounmap(d
->imc
[i
].mbase
);
641 for (j
= 0; j
< NUM_CHANNELS
; j
++) {
642 if (d
->imc
[i
].chan
[j
].cdev
)
643 pci_dev_put(d
->imc
[i
].chan
[j
].cdev
);
647 pci_dev_put(d
->util_all
);
649 pci_dev_put(d
->sad_all
);
651 pci_dev_put(d
->uracu
);