1 // SPDX-License-Identifier: GPL-2.0
4 * Shared code by both skx_edac and i10nm_edac. Originally split out
5 * from the skx_edac driver.
7 * This file is linked into both skx_edac and i10nm_edac drivers. In
8 * order to avoid link errors, this file must be like a pure library
9 * without including symbols and defines which would otherwise conflict,
10 * when linked once into a module and into a built-in object, at the
11 * same time. For example, __this_module symbol references when that
12 * file is being linked into a built-in object.
14 * Copyright (c) 2018, Intel Corporation.
17 #include <linux/acpi.h>
18 #include <linux/dmi.h>
19 #include <linux/adxl.h>
20 #include <acpi/nfit.h>
22 #include "edac_module.h"
23 #include "skx_common.h"
25 static const char * const component_names
[] = {
26 [INDEX_SOCKET
] = "ProcessorSocketId",
27 [INDEX_MEMCTRL
] = "MemoryControllerId",
28 [INDEX_CHANNEL
] = "ChannelId",
29 [INDEX_DIMM
] = "DimmSlotId",
32 static int component_indices
[ARRAY_SIZE(component_names
)];
33 static int adxl_component_count
;
34 static const char * const *adxl_component_names
;
35 static u64
*adxl_values
;
36 static char *adxl_msg
;
38 static char skx_msg
[MSG_SIZE
];
39 static skx_decode_f skx_decode
;
40 static skx_show_retry_log_f skx_show_retry_rd_err_log
;
41 static u64 skx_tolm
, skx_tohm
;
42 static LIST_HEAD(dev_edac_list
);
44 int __init
skx_adxl_get(void)
46 const char * const *names
;
49 names
= adxl_get_component_names();
51 skx_printk(KERN_NOTICE
, "No firmware support for address translation.\n");
55 for (i
= 0; i
< INDEX_MAX
; i
++) {
56 for (j
= 0; names
[j
]; j
++) {
57 if (!strcmp(component_names
[i
], names
[j
])) {
58 component_indices
[i
] = j
;
67 adxl_component_names
= names
;
69 adxl_component_count
++;
71 adxl_values
= kcalloc(adxl_component_count
, sizeof(*adxl_values
),
74 adxl_component_count
= 0;
78 adxl_msg
= kzalloc(MSG_SIZE
, GFP_KERNEL
);
80 adxl_component_count
= 0;
87 skx_printk(KERN_ERR
, "'%s' is not matched from DSM parameters: ",
89 for (j
= 0; names
[j
]; j
++)
90 skx_printk(KERN_CONT
, "%s ", names
[j
]);
91 skx_printk(KERN_CONT
, "\n");
96 void __exit
skx_adxl_put(void)
102 static bool skx_adxl_decode(struct decoded_addr
*res
)
107 if (res
->addr
>= skx_tohm
|| (res
->addr
>= skx_tolm
&&
108 res
->addr
< BIT_ULL(32))) {
109 edac_dbg(0, "Address 0x%llx out of range\n", res
->addr
);
113 if (adxl_decode(res
->addr
, adxl_values
)) {
114 edac_dbg(0, "Failed to decode 0x%llx\n", res
->addr
);
118 res
->socket
= (int)adxl_values
[component_indices
[INDEX_SOCKET
]];
119 res
->imc
= (int)adxl_values
[component_indices
[INDEX_MEMCTRL
]];
120 res
->channel
= (int)adxl_values
[component_indices
[INDEX_CHANNEL
]];
121 res
->dimm
= (int)adxl_values
[component_indices
[INDEX_DIMM
]];
123 if (res
->imc
> NUM_IMC
- 1) {
124 skx_printk(KERN_ERR
, "Bad imc %d\n", res
->imc
);
128 list_for_each_entry(d
, &dev_edac_list
, list
) {
129 if (d
->imc
[0].src_id
== res
->socket
) {
136 skx_printk(KERN_ERR
, "No device for src_id %d imc %d\n",
137 res
->socket
, res
->imc
);
141 for (i
= 0; i
< adxl_component_count
; i
++) {
142 if (adxl_values
[i
] == ~0x0ull
)
145 len
+= snprintf(adxl_msg
+ len
, MSG_SIZE
- len
, " %s:0x%llx",
146 adxl_component_names
[i
], adxl_values
[i
]);
147 if (MSG_SIZE
- len
<= 0)
154 void skx_set_decode(skx_decode_f decode
, skx_show_retry_log_f show_retry_log
)
157 skx_show_retry_rd_err_log
= show_retry_log
;
160 int skx_get_src_id(struct skx_dev
*d
, int off
, u8
*id
)
164 if (pci_read_config_dword(d
->util_all
, off
, ®
)) {
165 skx_printk(KERN_ERR
, "Failed to read src id\n");
169 *id
= GET_BITFIELD(reg
, 12, 14);
173 int skx_get_node_id(struct skx_dev
*d
, u8
*id
)
177 if (pci_read_config_dword(d
->util_all
, 0xf4, ®
)) {
178 skx_printk(KERN_ERR
, "Failed to read node id\n");
182 *id
= GET_BITFIELD(reg
, 0, 2);
186 static int get_width(u32 mtr
)
188 switch (GET_BITFIELD(mtr
, 8, 9)) {
200 * We use the per-socket device @cfg->did to count how many sockets are present,
201 * and to detemine which PCI buses are associated with each socket. Allocate
202 * and build the full list of all the skx_dev structures that we need here.
204 int skx_get_all_bus_mappings(struct res_config
*cfg
, struct list_head
**list
)
206 struct pci_dev
*pdev
, *prev
;
213 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, cfg
->decs_did
, prev
);
217 d
= kzalloc(sizeof(*d
), GFP_KERNEL
);
223 if (pci_read_config_dword(pdev
, cfg
->busno_cfg_offset
, ®
)) {
226 skx_printk(KERN_ERR
, "Failed to read bus idx\n");
230 d
->bus
[0] = GET_BITFIELD(reg
, 0, 7);
231 d
->bus
[1] = GET_BITFIELD(reg
, 8, 15);
232 if (cfg
->type
== SKX
) {
233 d
->seg
= pci_domain_nr(pdev
->bus
);
234 d
->bus
[2] = GET_BITFIELD(reg
, 16, 23);
235 d
->bus
[3] = GET_BITFIELD(reg
, 24, 31);
237 d
->seg
= GET_BITFIELD(reg
, 16, 23);
240 edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
241 d
->bus
[0], d
->bus
[1], d
->bus
[2], d
->bus
[3]);
242 list_add_tail(&d
->list
, &dev_edac_list
);
247 *list
= &dev_edac_list
;
251 int skx_get_hi_lo(unsigned int did
, int off
[], u64
*tolm
, u64
*tohm
)
253 struct pci_dev
*pdev
;
256 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, did
, NULL
);
258 edac_dbg(2, "Can't get tolm/tohm\n");
262 if (pci_read_config_dword(pdev
, off
[0], ®
)) {
263 skx_printk(KERN_ERR
, "Failed to read tolm\n");
268 if (pci_read_config_dword(pdev
, off
[1], ®
)) {
269 skx_printk(KERN_ERR
, "Failed to read lower tohm\n");
274 if (pci_read_config_dword(pdev
, off
[2], ®
)) {
275 skx_printk(KERN_ERR
, "Failed to read upper tohm\n");
278 skx_tohm
|= (u64
)reg
<< 32;
283 edac_dbg(2, "tolm = 0x%llx tohm = 0x%llx\n", skx_tolm
, skx_tohm
);
290 static int skx_get_dimm_attr(u32 reg
, int lobit
, int hibit
, int add
,
291 int minval
, int maxval
, const char *name
)
293 u32 val
= GET_BITFIELD(reg
, lobit
, hibit
);
295 if (val
< minval
|| val
> maxval
) {
296 edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name
, val
, reg
);
302 #define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks")
303 #define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows")
304 #define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols")
306 int skx_get_dimm_info(u32 mtr
, u32 mcmtr
, u32 amap
, struct dimm_info
*dimm
,
307 struct skx_imc
*imc
, int chan
, int dimmno
,
308 struct res_config
*cfg
)
310 int banks
, ranks
, rows
, cols
, npages
;
314 ranks
= numrank(mtr
);
318 if (cfg
->support_ddr5
&& (amap
& 0x8)) {
327 * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
329 size
= ((1ull << (rows
+ cols
+ ranks
)) * banks
) >> (20 - 3);
330 npages
= MiB_TO_PAGES(size
);
332 edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%x, col: 0x%x\n",
333 imc
->mc
, chan
, dimmno
, size
, npages
,
334 banks
, 1 << ranks
, rows
, cols
);
336 imc
->chan
[chan
].dimms
[dimmno
].close_pg
= GET_BITFIELD(mcmtr
, 0, 0);
337 imc
->chan
[chan
].dimms
[dimmno
].bank_xor_enable
= GET_BITFIELD(mcmtr
, 9, 9);
338 imc
->chan
[chan
].dimms
[dimmno
].fine_grain_bank
= GET_BITFIELD(amap
, 0, 0);
339 imc
->chan
[chan
].dimms
[dimmno
].rowbits
= rows
;
340 imc
->chan
[chan
].dimms
[dimmno
].colbits
= cols
;
342 dimm
->nr_pages
= npages
;
344 dimm
->dtype
= get_width(mtr
);
346 dimm
->edac_mode
= EDAC_SECDED
; /* likely better than this */
347 snprintf(dimm
->label
, sizeof(dimm
->label
), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
348 imc
->src_id
, imc
->lmc
, chan
, dimmno
);
353 int skx_get_nvdimm_info(struct dimm_info
*dimm
, struct skx_imc
*imc
,
354 int chan
, int dimmno
, const char *mod_str
)
361 dev_handle
= ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno
, chan
, imc
->lmc
,
364 smbios_handle
= nfit_get_smbios_id(dev_handle
, &flags
);
365 if (smbios_handle
== -EOPNOTSUPP
) {
366 pr_warn_once("%s: Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n", mod_str
);
370 if (smbios_handle
< 0) {
371 skx_printk(KERN_ERR
, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle
);
375 if (flags
& ACPI_NFIT_MEM_MAP_FAILED
) {
376 skx_printk(KERN_ERR
, "NVDIMM ADR=0x%x is not mapped\n", dev_handle
);
380 size
= dmi_memdev_size(smbios_handle
);
382 skx_printk(KERN_ERR
, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n",
383 dev_handle
, smbios_handle
);
386 dimm
->nr_pages
= size
>> PAGE_SHIFT
;
388 dimm
->dtype
= DEV_UNKNOWN
;
389 dimm
->mtype
= MEM_NVDIMM
;
390 dimm
->edac_mode
= EDAC_SECDED
; /* likely better than this */
392 edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n",
393 imc
->mc
, chan
, dimmno
, size
>> 20, dimm
->nr_pages
);
395 snprintf(dimm
->label
, sizeof(dimm
->label
), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
396 imc
->src_id
, imc
->lmc
, chan
, dimmno
);
398 return (size
== 0 || size
== ~0ull) ? 0 : 1;
401 int skx_register_mci(struct skx_imc
*imc
, struct pci_dev
*pdev
,
402 const char *ctl_name
, const char *mod_str
,
403 get_dimm_config_f get_dimm_config
,
404 struct res_config
*cfg
)
406 struct mem_ctl_info
*mci
;
407 struct edac_mc_layer layers
[2];
411 /* Allocate a new MC control structure */
412 layers
[0].type
= EDAC_MC_LAYER_CHANNEL
;
413 layers
[0].size
= NUM_CHANNELS
;
414 layers
[0].is_virt_csrow
= false;
415 layers
[1].type
= EDAC_MC_LAYER_SLOT
;
416 layers
[1].size
= NUM_DIMMS
;
417 layers
[1].is_virt_csrow
= true;
418 mci
= edac_mc_alloc(imc
->mc
, ARRAY_SIZE(layers
), layers
,
419 sizeof(struct skx_pvt
));
424 edac_dbg(0, "MC#%d: mci = %p\n", imc
->mc
, mci
);
426 /* Associate skx_dev and mci for future usage */
431 mci
->ctl_name
= kasprintf(GFP_KERNEL
, "%s#%d IMC#%d", ctl_name
,
432 imc
->node_id
, imc
->lmc
);
433 if (!mci
->ctl_name
) {
438 mci
->mtype_cap
= MEM_FLAG_DDR4
| MEM_FLAG_NVDIMM
;
439 if (cfg
->support_ddr5
)
440 mci
->mtype_cap
|= MEM_FLAG_DDR5
;
441 mci
->edac_ctl_cap
= EDAC_FLAG_NONE
;
442 mci
->edac_cap
= EDAC_FLAG_NONE
;
443 mci
->mod_name
= mod_str
;
444 mci
->dev_name
= pci_name(pdev
);
445 mci
->ctl_page_to_phys
= NULL
;
447 rc
= get_dimm_config(mci
, cfg
);
451 /* Record ptr to the generic device */
452 mci
->pdev
= &pdev
->dev
;
454 /* Add this new MC control structure to EDAC's list of MCs */
455 if (unlikely(edac_mc_add_mc(mci
))) {
456 edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
464 kfree(mci
->ctl_name
);
471 static void skx_unregister_mci(struct skx_imc
*imc
)
473 struct mem_ctl_info
*mci
= imc
->mci
;
478 edac_dbg(0, "MC%d: mci = %p\n", imc
->mc
, mci
);
480 /* Remove MC sysfs nodes */
481 edac_mc_del_mc(mci
->pdev
);
483 edac_dbg(1, "%s: free mci struct\n", mci
->ctl_name
);
484 kfree(mci
->ctl_name
);
488 static void skx_mce_output_error(struct mem_ctl_info
*mci
,
490 struct decoded_addr
*res
)
492 enum hw_event_mc_err_type tp_event
;
494 bool ripv
= GET_BITFIELD(m
->mcgstatus
, 0, 0);
495 bool overflow
= GET_BITFIELD(m
->status
, 62, 62);
496 bool uncorrected_error
= GET_BITFIELD(m
->status
, 61, 61);
499 u32 core_err_cnt
= GET_BITFIELD(m
->status
, 38, 52);
500 u32 mscod
= GET_BITFIELD(m
->status
, 16, 31);
501 u32 errcode
= GET_BITFIELD(m
->status
, 0, 15);
502 u32 optypenum
= GET_BITFIELD(m
->status
, 4, 6);
504 recoverable
= GET_BITFIELD(m
->status
, 56, 56);
506 if (uncorrected_error
) {
509 tp_event
= HW_EVENT_ERR_UNCORRECTED
;
511 tp_event
= HW_EVENT_ERR_FATAL
;
514 tp_event
= HW_EVENT_ERR_CORRECTED
;
518 * According to Intel Architecture spec vol 3B,
519 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
520 * memory errors should fit one of these masks:
521 * 000f 0000 1mmm cccc (binary)
522 * 000f 0010 1mmm cccc (binary) [RAM used as cache]
524 * f = Correction Report Filtering Bit. If 1, subsequent errors
528 * If the mask doesn't match, report an error to the parsing logic
530 if (!((errcode
& 0xef80) == 0x80 || (errcode
& 0xef80) == 0x280)) {
531 optype
= "Can't parse: it is not a mem";
535 optype
= "generic undef request error";
538 optype
= "memory read error";
541 optype
= "memory write error";
544 optype
= "addr/cmd error";
547 optype
= "memory scrubbing error";
554 if (adxl_component_count
) {
555 len
= snprintf(skx_msg
, MSG_SIZE
, "%s%s err_code:0x%04x:0x%04x %s",
556 overflow
? " OVERFLOW" : "",
557 (uncorrected_error
&& recoverable
) ? " recoverable" : "",
558 mscod
, errcode
, adxl_msg
);
560 len
= snprintf(skx_msg
, MSG_SIZE
,
561 "%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x",
562 overflow
? " OVERFLOW" : "",
563 (uncorrected_error
&& recoverable
) ? " recoverable" : "",
565 res
->socket
, res
->imc
, res
->rank
,
566 res
->bank_group
, res
->bank_address
, res
->row
, res
->column
);
569 if (skx_show_retry_rd_err_log
)
570 skx_show_retry_rd_err_log(res
, skx_msg
+ len
, MSG_SIZE
- len
);
572 edac_dbg(0, "%s\n", skx_msg
);
574 /* Call the helper to output message */
575 edac_mc_handle_error(tp_event
, mci
, core_err_cnt
,
576 m
->addr
>> PAGE_SHIFT
, m
->addr
& ~PAGE_MASK
, 0,
577 res
->channel
, res
->dimm
, -1,
581 int skx_mce_check_error(struct notifier_block
*nb
, unsigned long val
,
584 struct mce
*mce
= (struct mce
*)data
;
585 struct decoded_addr res
;
586 struct mem_ctl_info
*mci
;
589 if (mce
->kflags
& MCE_HANDLED_CEC
)
592 /* ignore unless this is memory related with an address */
593 if ((mce
->status
& 0xefff) >> 7 != 1 || !(mce
->status
& MCI_STATUS_ADDRV
))
596 memset(&res
, 0, sizeof(res
));
597 res
.addr
= mce
->addr
;
599 if (adxl_component_count
) {
600 if (!skx_adxl_decode(&res
))
602 } else if (!skx_decode
|| !skx_decode(&res
)) {
606 mci
= res
.dev
->imc
[res
.imc
].mci
;
611 if (mce
->mcgstatus
& MCG_STATUS_MCIP
)
616 skx_mc_printk(mci
, KERN_DEBUG
, "HANDLING MCE MEMORY ERROR\n");
618 skx_mc_printk(mci
, KERN_DEBUG
, "CPU %d: Machine Check %s: 0x%llx "
619 "Bank %d: 0x%llx\n", mce
->extcpu
, type
,
620 mce
->mcgstatus
, mce
->bank
, mce
->status
);
621 skx_mc_printk(mci
, KERN_DEBUG
, "TSC 0x%llx ", mce
->tsc
);
622 skx_mc_printk(mci
, KERN_DEBUG
, "ADDR 0x%llx ", mce
->addr
);
623 skx_mc_printk(mci
, KERN_DEBUG
, "MISC 0x%llx ", mce
->misc
);
625 skx_mc_printk(mci
, KERN_DEBUG
, "PROCESSOR %u:0x%x TIME %llu SOCKET "
626 "%u APIC 0x%x\n", mce
->cpuvendor
, mce
->cpuid
,
627 mce
->time
, mce
->socketid
, mce
->apicid
);
629 skx_mce_output_error(mci
, mce
, &res
);
631 mce
->kflags
|= MCE_HANDLED_EDAC
;
635 void skx_remove(void)
638 struct skx_dev
*d
, *tmp
;
642 list_for_each_entry_safe(d
, tmp
, &dev_edac_list
, list
) {
644 for (i
= 0; i
< NUM_IMC
; i
++) {
646 skx_unregister_mci(&d
->imc
[i
]);
649 pci_dev_put(d
->imc
[i
].mdev
);
652 iounmap(d
->imc
[i
].mbase
);
654 for (j
= 0; j
< NUM_CHANNELS
; j
++) {
655 if (d
->imc
[i
].chan
[j
].cdev
)
656 pci_dev_put(d
->imc
[i
].chan
[j
].cdev
);
660 pci_dev_put(d
->util_all
);
662 pci_dev_put(d
->sad_all
);
664 pci_dev_put(d
->uracu
);