1 #include "amd64_edac.h"
2 #include <asm/amd_nb.h>
4 static struct edac_pci_ctl_info
*pci_ctl
;
6 static int report_gart_errors
;
7 module_param(report_gart_errors
, int, 0644);
10 * Set by command line parameter. If BIOS has enabled the ECC, this override is
11 * cleared to prevent re-enabling the hardware by this driver.
13 static int ecc_enable_override
;
14 module_param(ecc_enable_override
, int, 0644);
16 static struct msr __percpu
*msrs
;
19 static struct ecc_settings
**ecc_stngs
;
22 * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
23 * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
26 *FIXME: Produce a better mapping/linearisation.
28 static const struct scrubrate
{
29 u32 scrubval
; /* bit pattern for scrub rate */
30 u32 bandwidth
; /* bandwidth consumed (bytes/sec) */
32 { 0x01, 1600000000UL},
54 { 0x00, 0UL}, /* scrubbing off */
57 int __amd64_read_pci_cfg_dword(struct pci_dev
*pdev
, int offset
,
58 u32
*val
, const char *func
)
62 err
= pci_read_config_dword(pdev
, offset
, val
);
64 amd64_warn("%s: error reading F%dx%03x.\n",
65 func
, PCI_FUNC(pdev
->devfn
), offset
);
70 int __amd64_write_pci_cfg_dword(struct pci_dev
*pdev
, int offset
,
71 u32 val
, const char *func
)
75 err
= pci_write_config_dword(pdev
, offset
, val
);
77 amd64_warn("%s: error writing to F%dx%03x.\n",
78 func
, PCI_FUNC(pdev
->devfn
), offset
);
84 * Select DCT to which PCI cfg accesses are routed
86 static void f15h_select_dct(struct amd64_pvt
*pvt
, u8 dct
)
90 amd64_read_pci_cfg(pvt
->F1
, DCT_CFG_SEL
, ®
);
91 reg
&= (pvt
->model
== 0x30) ? ~3 : ~1;
93 amd64_write_pci_cfg(pvt
->F1
, DCT_CFG_SEL
, reg
);
98 * Depending on the family, F2 DCT reads need special handling:
100 * K8: has a single DCT only and no address offsets >= 0x100
102 * F10h: each DCT has its own set of regs
106 * F16h: has only 1 DCT
108 * F15h: we select which DCT we access using F1x10C[DctCfgSel]
110 static inline int amd64_read_dct_pci_cfg(struct amd64_pvt
*pvt
, u8 dct
,
111 int offset
, u32
*val
)
115 if (dct
|| offset
>= 0x100)
122 * Note: If ganging is enabled, barring the regs
123 * F2x[1,0]98 and F2x[1,0]9C; reads reads to F2x1xx
124 * return 0. (cf. Section 2.8.1 F10h BKDG)
126 if (dct_ganging_enabled(pvt
))
135 * F15h: F2x1xx addresses do not map explicitly to DCT1.
136 * We should select which DCT we access using F1x10C[DctCfgSel]
138 dct
= (dct
&& pvt
->model
== 0x30) ? 3 : dct
;
139 f15h_select_dct(pvt
, dct
);
150 return amd64_read_pci_cfg(pvt
->F2
, offset
, val
);
154 * Memory scrubber control interface. For K8, memory scrubbing is handled by
155 * hardware and can involve L2 cache, dcache as well as the main memory. With
156 * F10, this is extended to L3 cache scrubbing on CPU models sporting that
159 * This causes the "units" for the scrubbing speed to vary from 64 byte blocks
160 * (dram) over to cache lines. This is nasty, so we will use bandwidth in
161 * bytes/sec for the setting.
163 * Currently, we only do dram scrubbing. If the scrubbing is done in software on
164 * other archs, we might not have access to the caches directly.
167 static inline void __f17h_set_scrubval(struct amd64_pvt
*pvt
, u32 scrubval
)
170 * Fam17h supports scrub values between 0x5 and 0x14. Also, the values
171 * are shifted down by 0x5, so scrubval 0x5 is written to the register
172 * as 0x0, scrubval 0x6 as 0x1, etc.
174 if (scrubval
>= 0x5 && scrubval
<= 0x14) {
176 pci_write_bits32(pvt
->F6
, F17H_SCR_LIMIT_ADDR
, scrubval
, 0xF);
177 pci_write_bits32(pvt
->F6
, F17H_SCR_BASE_ADDR
, 1, 0x1);
179 pci_write_bits32(pvt
->F6
, F17H_SCR_BASE_ADDR
, 0, 0x1);
183 * Scan the scrub rate mapping table for a close or matching bandwidth value to
184 * issue. If requested is too big, then use last maximum value found.
186 static int __set_scrub_rate(struct amd64_pvt
*pvt
, u32 new_bw
, u32 min_rate
)
192 * map the configured rate (new_bw) to a value specific to the AMD64
193 * memory controller and apply to register. Search for the first
194 * bandwidth entry that is greater or equal than the setting requested
195 * and program that. If at last entry, turn off DRAM scrubbing.
197 * If no suitable bandwidth is found, turn off DRAM scrubbing entirely
198 * by falling back to the last element in scrubrates[].
200 for (i
= 0; i
< ARRAY_SIZE(scrubrates
) - 1; i
++) {
202 * skip scrub rates which aren't recommended
203 * (see F10 BKDG, F3x58)
205 if (scrubrates
[i
].scrubval
< min_rate
)
208 if (scrubrates
[i
].bandwidth
<= new_bw
)
212 scrubval
= scrubrates
[i
].scrubval
;
214 if (pvt
->fam
== 0x17) {
215 __f17h_set_scrubval(pvt
, scrubval
);
216 } else if (pvt
->fam
== 0x15 && pvt
->model
== 0x60) {
217 f15h_select_dct(pvt
, 0);
218 pci_write_bits32(pvt
->F2
, F15H_M60H_SCRCTRL
, scrubval
, 0x001F);
219 f15h_select_dct(pvt
, 1);
220 pci_write_bits32(pvt
->F2
, F15H_M60H_SCRCTRL
, scrubval
, 0x001F);
222 pci_write_bits32(pvt
->F3
, SCRCTRL
, scrubval
, 0x001F);
226 return scrubrates
[i
].bandwidth
;
231 static int set_scrub_rate(struct mem_ctl_info
*mci
, u32 bw
)
233 struct amd64_pvt
*pvt
= mci
->pvt_info
;
234 u32 min_scrubrate
= 0x5;
239 if (pvt
->fam
== 0x15) {
241 if (pvt
->model
< 0x10)
242 f15h_select_dct(pvt
, 0);
244 if (pvt
->model
== 0x60)
247 return __set_scrub_rate(pvt
, bw
, min_scrubrate
);
250 static int get_scrub_rate(struct mem_ctl_info
*mci
)
252 struct amd64_pvt
*pvt
= mci
->pvt_info
;
253 int i
, retval
= -EINVAL
;
259 if (pvt
->model
< 0x10)
260 f15h_select_dct(pvt
, 0);
262 if (pvt
->model
== 0x60)
263 amd64_read_pci_cfg(pvt
->F2
, F15H_M60H_SCRCTRL
, &scrubval
);
267 amd64_read_pci_cfg(pvt
->F6
, F17H_SCR_BASE_ADDR
, &scrubval
);
268 if (scrubval
& BIT(0)) {
269 amd64_read_pci_cfg(pvt
->F6
, F17H_SCR_LIMIT_ADDR
, &scrubval
);
278 amd64_read_pci_cfg(pvt
->F3
, SCRCTRL
, &scrubval
);
282 scrubval
= scrubval
& 0x001F;
284 for (i
= 0; i
< ARRAY_SIZE(scrubrates
); i
++) {
285 if (scrubrates
[i
].scrubval
== scrubval
) {
286 retval
= scrubrates
[i
].bandwidth
;
294 * returns true if the SysAddr given by sys_addr matches the
295 * DRAM base/limit associated with node_id
297 static bool base_limit_match(struct amd64_pvt
*pvt
, u64 sys_addr
, u8 nid
)
301 /* The K8 treats this as a 40-bit value. However, bits 63-40 will be
302 * all ones if the most significant implemented address bit is 1.
303 * Here we discard bits 63-40. See section 3.4.2 of AMD publication
304 * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1
305 * Application Programming.
307 addr
= sys_addr
& 0x000000ffffffffffull
;
309 return ((addr
>= get_dram_base(pvt
, nid
)) &&
310 (addr
<= get_dram_limit(pvt
, nid
)));
314 * Attempt to map a SysAddr to a node. On success, return a pointer to the
315 * mem_ctl_info structure for the node that the SysAddr maps to.
317 * On failure, return NULL.
319 static struct mem_ctl_info
*find_mc_by_sys_addr(struct mem_ctl_info
*mci
,
322 struct amd64_pvt
*pvt
;
327 * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section
328 * 3.4.4.2) registers to map the SysAddr to a node ID.
333 * The value of this field should be the same for all DRAM Base
334 * registers. Therefore we arbitrarily choose to read it from the
335 * register for node 0.
337 intlv_en
= dram_intlv_en(pvt
, 0);
340 for (node_id
= 0; node_id
< DRAM_RANGES
; node_id
++) {
341 if (base_limit_match(pvt
, sys_addr
, node_id
))
347 if (unlikely((intlv_en
!= 0x01) &&
348 (intlv_en
!= 0x03) &&
349 (intlv_en
!= 0x07))) {
350 amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en
);
354 bits
= (((u32
) sys_addr
) >> 12) & intlv_en
;
356 for (node_id
= 0; ; ) {
357 if ((dram_intlv_sel(pvt
, node_id
) & intlv_en
) == bits
)
358 break; /* intlv_sel field matches */
360 if (++node_id
>= DRAM_RANGES
)
364 /* sanity test for sys_addr */
365 if (unlikely(!base_limit_match(pvt
, sys_addr
, node_id
))) {
366 amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
367 "range for node %d with node interleaving enabled.\n",
368 __func__
, sys_addr
, node_id
);
373 return edac_mc_find((int)node_id
);
376 edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n",
377 (unsigned long)sys_addr
);
383 * compute the CS base address of the @csrow on the DRAM controller @dct.
384 * For details see F2x[5C:40] in the processor's BKDG
386 static void get_cs_base_and_mask(struct amd64_pvt
*pvt
, int csrow
, u8 dct
,
387 u64
*base
, u64
*mask
)
389 u64 csbase
, csmask
, base_bits
, mask_bits
;
392 if (pvt
->fam
== 0xf && pvt
->ext_model
< K8_REV_F
) {
393 csbase
= pvt
->csels
[dct
].csbases
[csrow
];
394 csmask
= pvt
->csels
[dct
].csmasks
[csrow
];
395 base_bits
= GENMASK_ULL(31, 21) | GENMASK_ULL(15, 9);
396 mask_bits
= GENMASK_ULL(29, 21) | GENMASK_ULL(15, 9);
400 * F16h and F15h, models 30h and later need two addr_shift values:
401 * 8 for high and 6 for low (cf. F16h BKDG).
403 } else if (pvt
->fam
== 0x16 ||
404 (pvt
->fam
== 0x15 && pvt
->model
>= 0x30)) {
405 csbase
= pvt
->csels
[dct
].csbases
[csrow
];
406 csmask
= pvt
->csels
[dct
].csmasks
[csrow
>> 1];
408 *base
= (csbase
& GENMASK_ULL(15, 5)) << 6;
409 *base
|= (csbase
& GENMASK_ULL(30, 19)) << 8;
412 /* poke holes for the csmask */
413 *mask
&= ~((GENMASK_ULL(15, 5) << 6) |
414 (GENMASK_ULL(30, 19) << 8));
416 *mask
|= (csmask
& GENMASK_ULL(15, 5)) << 6;
417 *mask
|= (csmask
& GENMASK_ULL(30, 19)) << 8;
421 csbase
= pvt
->csels
[dct
].csbases
[csrow
];
422 csmask
= pvt
->csels
[dct
].csmasks
[csrow
>> 1];
425 if (pvt
->fam
== 0x15)
426 base_bits
= mask_bits
=
427 GENMASK_ULL(30,19) | GENMASK_ULL(13,5);
429 base_bits
= mask_bits
=
430 GENMASK_ULL(28,19) | GENMASK_ULL(13,5);
433 *base
= (csbase
& base_bits
) << addr_shift
;
436 /* poke holes for the csmask */
437 *mask
&= ~(mask_bits
<< addr_shift
);
439 *mask
|= (csmask
& mask_bits
) << addr_shift
;
442 #define for_each_chip_select(i, dct, pvt) \
443 for (i = 0; i < pvt->csels[dct].b_cnt; i++)
445 #define chip_select_base(i, dct, pvt) \
446 pvt->csels[dct].csbases[i]
448 #define for_each_chip_select_mask(i, dct, pvt) \
449 for (i = 0; i < pvt->csels[dct].m_cnt; i++)
452 * @input_addr is an InputAddr associated with the node given by mci. Return the
453 * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
455 static int input_addr_to_csrow(struct mem_ctl_info
*mci
, u64 input_addr
)
457 struct amd64_pvt
*pvt
;
463 for_each_chip_select(csrow
, 0, pvt
) {
464 if (!csrow_enabled(csrow
, 0, pvt
))
467 get_cs_base_and_mask(pvt
, csrow
, 0, &base
, &mask
);
471 if ((input_addr
& mask
) == (base
& mask
)) {
472 edac_dbg(2, "InputAddr 0x%lx matches csrow %d (node %d)\n",
473 (unsigned long)input_addr
, csrow
,
479 edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n",
480 (unsigned long)input_addr
, pvt
->mc_node_id
);
486 * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094)
487 * for the node represented by mci. Info is passed back in *hole_base,
488 * *hole_offset, and *hole_size. Function returns 0 if info is valid or 1 if
489 * info is invalid. Info may be invalid for either of the following reasons:
491 * - The revision of the node is not E or greater. In this case, the DRAM Hole
492 * Address Register does not exist.
494 * - The DramHoleValid bit is cleared in the DRAM Hole Address Register,
495 * indicating that its contents are not valid.
497 * The values passed back in *hole_base, *hole_offset, and *hole_size are
498 * complete 32-bit values despite the fact that the bitfields in the DHAR
499 * only represent bits 31-24 of the base and offset values.
501 int amd64_get_dram_hole_info(struct mem_ctl_info
*mci
, u64
*hole_base
,
502 u64
*hole_offset
, u64
*hole_size
)
504 struct amd64_pvt
*pvt
= mci
->pvt_info
;
506 /* only revE and later have the DRAM Hole Address Register */
507 if (pvt
->fam
== 0xf && pvt
->ext_model
< K8_REV_E
) {
508 edac_dbg(1, " revision %d for node %d does not support DHAR\n",
509 pvt
->ext_model
, pvt
->mc_node_id
);
513 /* valid for Fam10h and above */
514 if (pvt
->fam
>= 0x10 && !dhar_mem_hoist_valid(pvt
)) {
515 edac_dbg(1, " Dram Memory Hoisting is DISABLED on this system\n");
519 if (!dhar_valid(pvt
)) {
520 edac_dbg(1, " Dram Memory Hoisting is DISABLED on this node %d\n",
525 /* This node has Memory Hoisting */
527 /* +------------------+--------------------+--------------------+-----
528 * | memory | DRAM hole | relocated |
529 * | [0, (x - 1)] | [x, 0xffffffff] | addresses from |
531 * | | | [0x100000000, |
532 * | | | (0x100000000+ |
533 * | | | (0xffffffff-x))] |
534 * +------------------+--------------------+--------------------+-----
536 * Above is a diagram of physical memory showing the DRAM hole and the
537 * relocated addresses from the DRAM hole. As shown, the DRAM hole
538 * starts at address x (the base address) and extends through address
539 * 0xffffffff. The DRAM Hole Address Register (DHAR) relocates the
540 * addresses in the hole so that they start at 0x100000000.
543 *hole_base
= dhar_base(pvt
);
544 *hole_size
= (1ULL << 32) - *hole_base
;
546 *hole_offset
= (pvt
->fam
> 0xf) ? f10_dhar_offset(pvt
)
547 : k8_dhar_offset(pvt
);
549 edac_dbg(1, " DHAR info for node %d base 0x%lx offset 0x%lx size 0x%lx\n",
550 pvt
->mc_node_id
, (unsigned long)*hole_base
,
551 (unsigned long)*hole_offset
, (unsigned long)*hole_size
);
555 EXPORT_SYMBOL_GPL(amd64_get_dram_hole_info
);
558 * Return the DramAddr that the SysAddr given by @sys_addr maps to. It is
559 * assumed that sys_addr maps to the node given by mci.
561 * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section
562 * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a
563 * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled,
564 * then it is also involved in translating a SysAddr to a DramAddr. Sections
565 * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting.
566 * These parts of the documentation are unclear. I interpret them as follows:
568 * When node n receives a SysAddr, it processes the SysAddr as follows:
570 * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM
571 * Limit registers for node n. If the SysAddr is not within the range
572 * specified by the base and limit values, then node n ignores the Sysaddr
573 * (since it does not map to node n). Otherwise continue to step 2 below.
575 * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is
576 * disabled so skip to step 3 below. Otherwise see if the SysAddr is within
577 * the range of relocated addresses (starting at 0x100000000) from the DRAM
578 * hole. If not, skip to step 3 below. Else get the value of the
579 * DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the
580 * offset defined by this value from the SysAddr.
582 * 3. Obtain the base address for node n from the DRAMBase field of the DRAM
583 * Base register for node n. To obtain the DramAddr, subtract the base
584 * address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
586 static u64
sys_addr_to_dram_addr(struct mem_ctl_info
*mci
, u64 sys_addr
)
588 struct amd64_pvt
*pvt
= mci
->pvt_info
;
589 u64 dram_base
, hole_base
, hole_offset
, hole_size
, dram_addr
;
592 dram_base
= get_dram_base(pvt
, pvt
->mc_node_id
);
594 ret
= amd64_get_dram_hole_info(mci
, &hole_base
, &hole_offset
,
597 if ((sys_addr
>= (1ULL << 32)) &&
598 (sys_addr
< ((1ULL << 32) + hole_size
))) {
599 /* use DHAR to translate SysAddr to DramAddr */
600 dram_addr
= sys_addr
- hole_offset
;
602 edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
603 (unsigned long)sys_addr
,
604 (unsigned long)dram_addr
);
611 * Translate the SysAddr to a DramAddr as shown near the start of
612 * section 3.4.4 (p. 70). Although sys_addr is a 64-bit value, the k8
613 * only deals with 40-bit values. Therefore we discard bits 63-40 of
614 * sys_addr below. If bit 39 of sys_addr is 1 then the bits we
615 * discard are all 1s. Otherwise the bits we discard are all 0s. See
616 * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture
617 * Programmer's Manual Volume 1 Application Programming.
619 dram_addr
= (sys_addr
& GENMASK_ULL(39, 0)) - dram_base
;
621 edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
622 (unsigned long)sys_addr
, (unsigned long)dram_addr
);
627 * @intlv_en is the value of the IntlvEn field from a DRAM Base register
628 * (section 3.4.4.1). Return the number of bits from a SysAddr that are used
629 * for node interleaving.
631 static int num_node_interleave_bits(unsigned intlv_en
)
633 static const int intlv_shift_table
[] = { 0, 1, 0, 2, 0, 0, 0, 3 };
636 BUG_ON(intlv_en
> 7);
637 n
= intlv_shift_table
[intlv_en
];
641 /* Translate the DramAddr given by @dram_addr to an InputAddr. */
642 static u64
dram_addr_to_input_addr(struct mem_ctl_info
*mci
, u64 dram_addr
)
644 struct amd64_pvt
*pvt
;
651 * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E)
652 * concerning translating a DramAddr to an InputAddr.
654 intlv_shift
= num_node_interleave_bits(dram_intlv_en(pvt
, 0));
655 input_addr
= ((dram_addr
>> intlv_shift
) & GENMASK_ULL(35, 12)) +
658 edac_dbg(2, " Intlv Shift=%d DramAddr=0x%lx maps to InputAddr=0x%lx\n",
659 intlv_shift
, (unsigned long)dram_addr
,
660 (unsigned long)input_addr
);
666 * Translate the SysAddr represented by @sys_addr to an InputAddr. It is
667 * assumed that @sys_addr maps to the node given by mci.
669 static u64
sys_addr_to_input_addr(struct mem_ctl_info
*mci
, u64 sys_addr
)
674 dram_addr_to_input_addr(mci
, sys_addr_to_dram_addr(mci
, sys_addr
));
676 edac_dbg(2, "SysAddr 0x%lx translates to InputAddr 0x%lx\n",
677 (unsigned long)sys_addr
, (unsigned long)input_addr
);
682 /* Map the Error address to a PAGE and PAGE OFFSET. */
683 static inline void error_address_to_page_and_offset(u64 error_address
,
684 struct err_info
*err
)
686 err
->page
= (u32
) (error_address
>> PAGE_SHIFT
);
687 err
->offset
= ((u32
) error_address
) & ~PAGE_MASK
;
691 * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address
692 * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers
693 * of a node that detected an ECC memory error. mci represents the node that
694 * the error address maps to (possibly different from the node that detected
695 * the error). Return the number of the csrow that sys_addr maps to, or -1 on
698 static int sys_addr_to_csrow(struct mem_ctl_info
*mci
, u64 sys_addr
)
702 csrow
= input_addr_to_csrow(mci
, sys_addr_to_input_addr(mci
, sys_addr
));
705 amd64_mc_err(mci
, "Failed to translate InputAddr to csrow for "
706 "address 0x%lx\n", (unsigned long)sys_addr
);
710 static int get_channel_from_ecc_syndrome(struct mem_ctl_info
*, u16
);
713 * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
716 static unsigned long determine_edac_cap(struct amd64_pvt
*pvt
)
718 unsigned long edac_cap
= EDAC_FLAG_NONE
;
722 u8 i
, umc_en_mask
= 0, dimm_ecc_en_mask
= 0;
724 for (i
= 0; i
< NUM_UMCS
; i
++) {
725 if (!(pvt
->umc
[i
].sdp_ctrl
& UMC_SDP_INIT
))
728 umc_en_mask
|= BIT(i
);
730 /* UMC Configuration bit 12 (DimmEccEn) */
731 if (pvt
->umc
[i
].umc_cfg
& BIT(12))
732 dimm_ecc_en_mask
|= BIT(i
);
735 if (umc_en_mask
== dimm_ecc_en_mask
)
736 edac_cap
= EDAC_FLAG_SECDED
;
738 bit
= (pvt
->fam
> 0xf || pvt
->ext_model
>= K8_REV_F
)
742 if (pvt
->dclr0
& BIT(bit
))
743 edac_cap
= EDAC_FLAG_SECDED
;
749 static void debug_display_dimm_sizes(struct amd64_pvt
*, u8
);
751 static void debug_dump_dramcfg_low(struct amd64_pvt
*pvt
, u32 dclr
, int chan
)
753 edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan
, dclr
);
755 if (pvt
->dram_type
== MEM_LRDDR3
) {
756 u32 dcsm
= pvt
->csels
[chan
].csmasks
[0];
758 * It's assumed all LRDIMMs in a DCT are going to be of
759 * same 'type' until proven otherwise. So, use a cs
760 * value of '0' here to get dcsm value.
762 edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm
& 0x3));
765 edac_dbg(1, "All DIMMs support ECC:%s\n",
766 (dclr
& BIT(19)) ? "yes" : "no");
769 edac_dbg(1, " PAR/ERR parity: %s\n",
770 (dclr
& BIT(8)) ? "enabled" : "disabled");
772 if (pvt
->fam
== 0x10)
773 edac_dbg(1, " DCT 128bit mode width: %s\n",
774 (dclr
& BIT(11)) ? "128b" : "64b");
776 edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
777 (dclr
& BIT(12)) ? "yes" : "no",
778 (dclr
& BIT(13)) ? "yes" : "no",
779 (dclr
& BIT(14)) ? "yes" : "no",
780 (dclr
& BIT(15)) ? "yes" : "no");
783 static void debug_display_dimm_sizes_df(struct amd64_pvt
*pvt
, u8 ctrl
)
785 int dimm
, size0
, size1
, cs0
, cs1
;
787 edac_printk(KERN_DEBUG
, EDAC_MC
, "UMC%d chip selects:\n", ctrl
);
789 for (dimm
= 0; dimm
< 4; dimm
++) {
793 if (csrow_enabled(cs0
, ctrl
, pvt
))
794 size0
= pvt
->ops
->dbam_to_cs(pvt
, ctrl
, 0, cs0
);
799 if (csrow_enabled(cs1
, ctrl
, pvt
))
800 size1
= pvt
->ops
->dbam_to_cs(pvt
, ctrl
, 0, cs1
);
802 amd64_info(EDAC_MC
": %d: %5dMB %d: %5dMB\n",
808 static void __dump_misc_regs_df(struct amd64_pvt
*pvt
)
810 struct amd64_umc
*umc
;
811 u32 i
, tmp
, umc_base
;
813 for (i
= 0; i
< NUM_UMCS
; i
++) {
814 umc_base
= get_umc_base(i
);
817 edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i
, umc
->dimm_cfg
);
818 edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i
, umc
->umc_cfg
);
819 edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i
, umc
->sdp_ctrl
);
820 edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i
, umc
->ecc_ctrl
);
822 amd_smn_read(pvt
->mc_node_id
, umc_base
+ UMCCH_ECC_BAD_SYMBOL
, &tmp
);
823 edac_dbg(1, "UMC%d ECC bad symbol: 0x%x\n", i
, tmp
);
825 amd_smn_read(pvt
->mc_node_id
, umc_base
+ UMCCH_UMC_CAP
, &tmp
);
826 edac_dbg(1, "UMC%d UMC cap: 0x%x\n", i
, tmp
);
827 edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i
, umc
->umc_cap_hi
);
829 edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n",
830 i
, (umc
->umc_cap_hi
& BIT(30)) ? "yes" : "no",
831 (umc
->umc_cap_hi
& BIT(31)) ? "yes" : "no");
832 edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n",
833 i
, (umc
->umc_cfg
& BIT(12)) ? "yes" : "no");
834 edac_dbg(1, "UMC%d x4 DIMMs present: %s\n",
835 i
, (umc
->dimm_cfg
& BIT(6)) ? "yes" : "no");
836 edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
837 i
, (umc
->dimm_cfg
& BIT(7)) ? "yes" : "no");
839 if (pvt
->dram_type
== MEM_LRDDR4
) {
840 amd_smn_read(pvt
->mc_node_id
, umc_base
+ UMCCH_ADDR_CFG
, &tmp
);
841 edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
842 i
, 1 << ((tmp
>> 4) & 0x3));
845 debug_display_dimm_sizes_df(pvt
, i
);
848 edac_dbg(1, "F0x104 (DRAM Hole Address): 0x%08x, base: 0x%08x\n",
849 pvt
->dhar
, dhar_base(pvt
));
852 /* Display and decode various NB registers for debug purposes. */
853 static void __dump_misc_regs(struct amd64_pvt
*pvt
)
855 edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt
->nbcap
);
857 edac_dbg(1, " NB two channel DRAM capable: %s\n",
858 (pvt
->nbcap
& NBCAP_DCT_DUAL
) ? "yes" : "no");
860 edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n",
861 (pvt
->nbcap
& NBCAP_SECDED
) ? "yes" : "no",
862 (pvt
->nbcap
& NBCAP_CHIPKILL
) ? "yes" : "no");
864 debug_dump_dramcfg_low(pvt
, pvt
->dclr0
, 0);
866 edac_dbg(1, "F3xB0 (Online Spare): 0x%08x\n", pvt
->online_spare
);
868 edac_dbg(1, "F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, offset: 0x%08x\n",
869 pvt
->dhar
, dhar_base(pvt
),
870 (pvt
->fam
== 0xf) ? k8_dhar_offset(pvt
)
871 : f10_dhar_offset(pvt
));
873 debug_display_dimm_sizes(pvt
, 0);
875 /* everything below this point is Fam10h and above */
879 debug_display_dimm_sizes(pvt
, 1);
881 /* Only if NOT ganged does dclr1 have valid info */
882 if (!dct_ganging_enabled(pvt
))
883 debug_dump_dramcfg_low(pvt
, pvt
->dclr1
, 1);
886 /* Display and decode various NB registers for debug purposes. */
887 static void dump_misc_regs(struct amd64_pvt
*pvt
)
890 __dump_misc_regs_df(pvt
);
892 __dump_misc_regs(pvt
);
894 edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt
) ? "yes" : "no");
896 amd64_info("using %s syndromes.\n",
897 ((pvt
->ecc_sym_sz
== 8) ? "x8" : "x4"));
901 * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
903 static void prep_chip_selects(struct amd64_pvt
*pvt
)
905 if (pvt
->fam
== 0xf && pvt
->ext_model
< K8_REV_F
) {
906 pvt
->csels
[0].b_cnt
= pvt
->csels
[1].b_cnt
= 8;
907 pvt
->csels
[0].m_cnt
= pvt
->csels
[1].m_cnt
= 8;
908 } else if (pvt
->fam
== 0x15 && pvt
->model
== 0x30) {
909 pvt
->csels
[0].b_cnt
= pvt
->csels
[1].b_cnt
= 4;
910 pvt
->csels
[0].m_cnt
= pvt
->csels
[1].m_cnt
= 2;
912 pvt
->csels
[0].b_cnt
= pvt
->csels
[1].b_cnt
= 8;
913 pvt
->csels
[0].m_cnt
= pvt
->csels
[1].m_cnt
= 4;
918 * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
920 static void read_dct_base_mask(struct amd64_pvt
*pvt
)
922 int base_reg0
, base_reg1
, mask_reg0
, mask_reg1
, cs
;
924 prep_chip_selects(pvt
);
927 base_reg0
= get_umc_base(0) + UMCCH_BASE_ADDR
;
928 base_reg1
= get_umc_base(1) + UMCCH_BASE_ADDR
;
929 mask_reg0
= get_umc_base(0) + UMCCH_ADDR_MASK
;
930 mask_reg1
= get_umc_base(1) + UMCCH_ADDR_MASK
;
938 for_each_chip_select(cs
, 0, pvt
) {
939 int reg0
= base_reg0
+ (cs
* 4);
940 int reg1
= base_reg1
+ (cs
* 4);
941 u32
*base0
= &pvt
->csels
[0].csbases
[cs
];
942 u32
*base1
= &pvt
->csels
[1].csbases
[cs
];
945 if (!amd_smn_read(pvt
->mc_node_id
, reg0
, base0
))
946 edac_dbg(0, " DCSB0[%d]=0x%08x reg: 0x%x\n",
949 if (!amd_smn_read(pvt
->mc_node_id
, reg1
, base1
))
950 edac_dbg(0, " DCSB1[%d]=0x%08x reg: 0x%x\n",
953 if (!amd64_read_dct_pci_cfg(pvt
, 0, reg0
, base0
))
954 edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n",
960 if (!amd64_read_dct_pci_cfg(pvt
, 1, reg0
, base1
))
961 edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n",
962 cs
, *base1
, (pvt
->fam
== 0x10) ? reg1
967 for_each_chip_select_mask(cs
, 0, pvt
) {
968 int reg0
= mask_reg0
+ (cs
* 4);
969 int reg1
= mask_reg1
+ (cs
* 4);
970 u32
*mask0
= &pvt
->csels
[0].csmasks
[cs
];
971 u32
*mask1
= &pvt
->csels
[1].csmasks
[cs
];
974 if (!amd_smn_read(pvt
->mc_node_id
, reg0
, mask0
))
975 edac_dbg(0, " DCSM0[%d]=0x%08x reg: 0x%x\n",
978 if (!amd_smn_read(pvt
->mc_node_id
, reg1
, mask1
))
979 edac_dbg(0, " DCSM1[%d]=0x%08x reg: 0x%x\n",
982 if (!amd64_read_dct_pci_cfg(pvt
, 0, reg0
, mask0
))
983 edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n",
989 if (!amd64_read_dct_pci_cfg(pvt
, 1, reg0
, mask1
))
990 edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n",
991 cs
, *mask1
, (pvt
->fam
== 0x10) ? reg1
997 static void determine_memory_type(struct amd64_pvt
*pvt
)
1003 if (pvt
->ext_model
>= K8_REV_F
)
1006 pvt
->dram_type
= (pvt
->dclr0
& BIT(18)) ? MEM_DDR
: MEM_RDDR
;
1010 if (pvt
->dchr0
& DDR3_MODE
)
1013 pvt
->dram_type
= (pvt
->dclr0
& BIT(16)) ? MEM_DDR2
: MEM_RDDR2
;
1017 if (pvt
->model
< 0x60)
1021 * Model 0x60h needs special handling:
1023 * We use a Chip Select value of '0' to obtain dcsm.
1024 * Theoretically, it is possible to populate LRDIMMs of different
1025 * 'Rank' value on a DCT. But this is not the common case. So,
1026 * it's reasonable to assume all DIMMs are going to be of same
1027 * 'type' until proven otherwise.
1029 amd64_read_dct_pci_cfg(pvt
, 0, DRAM_CONTROL
, &dram_ctrl
);
1030 dcsm
= pvt
->csels
[0].csmasks
[0];
1032 if (((dram_ctrl
>> 8) & 0x7) == 0x2)
1033 pvt
->dram_type
= MEM_DDR4
;
1034 else if (pvt
->dclr0
& BIT(16))
1035 pvt
->dram_type
= MEM_DDR3
;
1036 else if (dcsm
& 0x3)
1037 pvt
->dram_type
= MEM_LRDDR3
;
1039 pvt
->dram_type
= MEM_RDDR3
;
1047 if ((pvt
->umc
[0].dimm_cfg
| pvt
->umc
[1].dimm_cfg
) & BIT(5))
1048 pvt
->dram_type
= MEM_LRDDR4
;
1049 else if ((pvt
->umc
[0].dimm_cfg
| pvt
->umc
[1].dimm_cfg
) & BIT(4))
1050 pvt
->dram_type
= MEM_RDDR4
;
1052 pvt
->dram_type
= MEM_DDR4
;
1056 WARN(1, KERN_ERR
"%s: Family??? 0x%x\n", __func__
, pvt
->fam
);
1057 pvt
->dram_type
= MEM_EMPTY
;
1062 pvt
->dram_type
= (pvt
->dclr0
& BIT(16)) ? MEM_DDR3
: MEM_RDDR3
;
1065 /* Get the number of DCT channels the memory controller is using. */
1066 static int k8_early_channel_count(struct amd64_pvt
*pvt
)
1070 if (pvt
->ext_model
>= K8_REV_F
)
1071 /* RevF (NPT) and later */
1072 flag
= pvt
->dclr0
& WIDTH_128
;
1074 /* RevE and earlier */
1075 flag
= pvt
->dclr0
& REVE_WIDTH_128
;
1080 return (flag
) ? 2 : 1;
1083 /* On F10h and later ErrAddr is MC4_ADDR[47:1] */
1084 static u64
get_error_address(struct amd64_pvt
*pvt
, struct mce
*m
)
1086 u16 mce_nid
= amd_get_nb_id(m
->extcpu
);
1087 struct mem_ctl_info
*mci
;
1092 mci
= edac_mc_find(mce_nid
);
1096 pvt
= mci
->pvt_info
;
1098 if (pvt
->fam
== 0xf) {
1103 addr
= m
->addr
& GENMASK_ULL(end_bit
, start_bit
);
1106 * Erratum 637 workaround
1108 if (pvt
->fam
== 0x15) {
1109 u64 cc6_base
, tmp_addr
;
1113 if ((addr
& GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7)
1117 amd64_read_pci_cfg(pvt
->F1
, DRAM_LOCAL_NODE_LIM
, &tmp
);
1118 intlv_en
= tmp
>> 21 & 0x7;
1120 /* add [47:27] + 3 trailing bits */
1121 cc6_base
= (tmp
& GENMASK_ULL(20, 0)) << 3;
1123 /* reverse and add DramIntlvEn */
1124 cc6_base
|= intlv_en
^ 0x7;
1126 /* pin at [47:24] */
1130 return cc6_base
| (addr
& GENMASK_ULL(23, 0));
1132 amd64_read_pci_cfg(pvt
->F1
, DRAM_LOCAL_NODE_BASE
, &tmp
);
1135 tmp_addr
= (addr
& GENMASK_ULL(23, 12)) << __fls(intlv_en
+ 1);
1137 /* OR DramIntlvSel into bits [14:12] */
1138 tmp_addr
|= (tmp
& GENMASK_ULL(23, 21)) >> 9;
1140 /* add remaining [11:0] bits from original MC4_ADDR */
1141 tmp_addr
|= addr
& GENMASK_ULL(11, 0);
1143 return cc6_base
| tmp_addr
;
1149 static struct pci_dev
*pci_get_related_function(unsigned int vendor
,
1150 unsigned int device
,
1151 struct pci_dev
*related
)
1153 struct pci_dev
*dev
= NULL
;
1155 while ((dev
= pci_get_device(vendor
, device
, dev
))) {
1156 if (pci_domain_nr(dev
->bus
) == pci_domain_nr(related
->bus
) &&
1157 (dev
->bus
->number
== related
->bus
->number
) &&
1158 (PCI_SLOT(dev
->devfn
) == PCI_SLOT(related
->devfn
)))
1165 static void read_dram_base_limit_regs(struct amd64_pvt
*pvt
, unsigned range
)
1167 struct amd_northbridge
*nb
;
1168 struct pci_dev
*f1
= NULL
;
1169 unsigned int pci_func
;
1170 int off
= range
<< 3;
1173 amd64_read_pci_cfg(pvt
->F1
, DRAM_BASE_LO
+ off
, &pvt
->ranges
[range
].base
.lo
);
1174 amd64_read_pci_cfg(pvt
->F1
, DRAM_LIMIT_LO
+ off
, &pvt
->ranges
[range
].lim
.lo
);
1176 if (pvt
->fam
== 0xf)
1179 if (!dram_rw(pvt
, range
))
1182 amd64_read_pci_cfg(pvt
->F1
, DRAM_BASE_HI
+ off
, &pvt
->ranges
[range
].base
.hi
);
1183 amd64_read_pci_cfg(pvt
->F1
, DRAM_LIMIT_HI
+ off
, &pvt
->ranges
[range
].lim
.hi
);
1185 /* F15h: factor in CC6 save area by reading dst node's limit reg */
1186 if (pvt
->fam
!= 0x15)
1189 nb
= node_to_amd_nb(dram_dst_node(pvt
, range
));
1193 if (pvt
->model
== 0x60)
1194 pci_func
= PCI_DEVICE_ID_AMD_15H_M60H_NB_F1
;
1195 else if (pvt
->model
== 0x30)
1196 pci_func
= PCI_DEVICE_ID_AMD_15H_M30H_NB_F1
;
1198 pci_func
= PCI_DEVICE_ID_AMD_15H_NB_F1
;
1200 f1
= pci_get_related_function(nb
->misc
->vendor
, pci_func
, nb
->misc
);
1204 amd64_read_pci_cfg(f1
, DRAM_LOCAL_NODE_LIM
, &llim
);
1206 pvt
->ranges
[range
].lim
.lo
&= GENMASK_ULL(15, 0);
1208 /* {[39:27],111b} */
1209 pvt
->ranges
[range
].lim
.lo
|= ((llim
& 0x1fff) << 3 | 0x7) << 16;
1211 pvt
->ranges
[range
].lim
.hi
&= GENMASK_ULL(7, 0);
1214 pvt
->ranges
[range
].lim
.hi
|= llim
>> 13;
1219 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info
*mci
, u64 sys_addr
,
1220 struct err_info
*err
)
1222 struct amd64_pvt
*pvt
= mci
->pvt_info
;
1224 error_address_to_page_and_offset(sys_addr
, err
);
1227 * Find out which node the error address belongs to. This may be
1228 * different from the node that detected the error.
1230 err
->src_mci
= find_mc_by_sys_addr(mci
, sys_addr
);
1231 if (!err
->src_mci
) {
1232 amd64_mc_err(mci
, "failed to map error addr 0x%lx to a node\n",
1233 (unsigned long)sys_addr
);
1234 err
->err_code
= ERR_NODE
;
1238 /* Now map the sys_addr to a CSROW */
1239 err
->csrow
= sys_addr_to_csrow(err
->src_mci
, sys_addr
);
1240 if (err
->csrow
< 0) {
1241 err
->err_code
= ERR_CSROW
;
1245 /* CHIPKILL enabled */
1246 if (pvt
->nbcfg
& NBCFG_CHIPKILL
) {
1247 err
->channel
= get_channel_from_ecc_syndrome(mci
, err
->syndrome
);
1248 if (err
->channel
< 0) {
1250 * Syndrome didn't map, so we don't know which of the
1251 * 2 DIMMs is in error. So we need to ID 'both' of them
1254 amd64_mc_warn(err
->src_mci
, "unknown syndrome 0x%04x - "
1255 "possible error reporting race\n",
1257 err
->err_code
= ERR_CHANNEL
;
1262 * non-chipkill ecc mode
1264 * The k8 documentation is unclear about how to determine the
1265 * channel number when using non-chipkill memory. This method
1266 * was obtained from email communication with someone at AMD.
1267 * (Wish the email was placed in this comment - norsk)
1269 err
->channel
= ((sys_addr
& BIT(3)) != 0);
1273 static int ddr2_cs_size(unsigned i
, bool dct_width
)
1279 else if (!(i
& 0x1))
1282 shift
= (i
+ 1) >> 1;
1284 return 128 << (shift
+ !!dct_width
);
1287 static int k8_dbam_to_chip_select(struct amd64_pvt
*pvt
, u8 dct
,
1288 unsigned cs_mode
, int cs_mask_nr
)
1290 u32 dclr
= dct
? pvt
->dclr1
: pvt
->dclr0
;
1292 if (pvt
->ext_model
>= K8_REV_F
) {
1293 WARN_ON(cs_mode
> 11);
1294 return ddr2_cs_size(cs_mode
, dclr
& WIDTH_128
);
1296 else if (pvt
->ext_model
>= K8_REV_D
) {
1298 WARN_ON(cs_mode
> 10);
1301 * the below calculation, besides trying to win an obfuscated C
1302 * contest, maps cs_mode values to DIMM chip select sizes. The
1305 * cs_mode CS size (mb)
1306 * ======= ============
1319 * Basically, it calculates a value with which to shift the
1320 * smallest CS size of 32MB.
1322 * ddr[23]_cs_size have a similar purpose.
1324 diff
= cs_mode
/3 + (unsigned)(cs_mode
> 5);
1326 return 32 << (cs_mode
- diff
);
1329 WARN_ON(cs_mode
> 6);
1330 return 32 << cs_mode
;
1335 * Get the number of DCT channels in use.
1338 * number of Memory Channels in operation
1340 * contents of the DCL0_LOW register
1342 static int f1x_early_channel_count(struct amd64_pvt
*pvt
)
1344 int i
, j
, channels
= 0;
1346 /* On F10h, if we are in 128 bit mode, then we are using 2 channels */
1347 if (pvt
->fam
== 0x10 && (pvt
->dclr0
& WIDTH_128
))
1351 * Need to check if in unganged mode: In such, there are 2 channels,
1352 * but they are not in 128 bit mode and thus the above 'dclr0' status
1355 * Need to check DCT0[0] and DCT1[0] to see if only one of them has
1356 * their CSEnable bit on. If so, then SINGLE DIMM case.
1358 edac_dbg(0, "Data width is not 128 bits - need more decoding\n");
1361 * Check DRAM Bank Address Mapping values for each DIMM to see if there
1362 * is more than just one DIMM present in unganged mode. Need to check
1363 * both controllers since DIMMs can be placed in either one.
1365 for (i
= 0; i
< 2; i
++) {
1366 u32 dbam
= (i
? pvt
->dbam1
: pvt
->dbam0
);
1368 for (j
= 0; j
< 4; j
++) {
1369 if (DBAM_DIMM(j
, dbam
) > 0) {
1379 amd64_info("MCT channel count: %d\n", channels
);
1384 static int f17_early_channel_count(struct amd64_pvt
*pvt
)
1386 int i
, channels
= 0;
1388 /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
1389 for (i
= 0; i
< NUM_UMCS
; i
++)
1390 channels
+= !!(pvt
->umc
[i
].sdp_ctrl
& UMC_SDP_INIT
);
1392 amd64_info("MCT channel count: %d\n", channels
);
1397 static int ddr3_cs_size(unsigned i
, bool dct_width
)
1402 if (i
== 0 || i
== 3 || i
== 4)
1408 else if (!(i
& 0x1))
1411 shift
= (i
+ 1) >> 1;
1414 cs_size
= (128 * (1 << !!dct_width
)) << shift
;
1419 static int ddr3_lrdimm_cs_size(unsigned i
, unsigned rank_multiply
)
1424 if (i
< 4 || i
== 6)
1428 else if (!(i
& 0x1))
1431 shift
= (i
+ 1) >> 1;
1434 cs_size
= rank_multiply
* (128 << shift
);
1439 static int ddr4_cs_size(unsigned i
)
1448 /* Min cs_size = 1G */
1449 cs_size
= 1024 * (1 << (i
>> 1));
1454 static int f10_dbam_to_chip_select(struct amd64_pvt
*pvt
, u8 dct
,
1455 unsigned cs_mode
, int cs_mask_nr
)
1457 u32 dclr
= dct
? pvt
->dclr1
: pvt
->dclr0
;
1459 WARN_ON(cs_mode
> 11);
1461 if (pvt
->dchr0
& DDR3_MODE
|| pvt
->dchr1
& DDR3_MODE
)
1462 return ddr3_cs_size(cs_mode
, dclr
& WIDTH_128
);
1464 return ddr2_cs_size(cs_mode
, dclr
& WIDTH_128
);
1468 * F15h supports only 64bit DCT interfaces
1470 static int f15_dbam_to_chip_select(struct amd64_pvt
*pvt
, u8 dct
,
1471 unsigned cs_mode
, int cs_mask_nr
)
1473 WARN_ON(cs_mode
> 12);
1475 return ddr3_cs_size(cs_mode
, false);
1478 /* F15h M60h supports DDR4 mapping as well.. */
1479 static int f15_m60h_dbam_to_chip_select(struct amd64_pvt
*pvt
, u8 dct
,
1480 unsigned cs_mode
, int cs_mask_nr
)
1483 u32 dcsm
= pvt
->csels
[dct
].csmasks
[cs_mask_nr
];
1485 WARN_ON(cs_mode
> 12);
1487 if (pvt
->dram_type
== MEM_DDR4
) {
1491 cs_size
= ddr4_cs_size(cs_mode
);
1492 } else if (pvt
->dram_type
== MEM_LRDDR3
) {
1493 unsigned rank_multiply
= dcsm
& 0xf;
1495 if (rank_multiply
== 3)
1497 cs_size
= ddr3_lrdimm_cs_size(cs_mode
, rank_multiply
);
1499 /* Minimum cs size is 512mb for F15hM60h*/
1503 cs_size
= ddr3_cs_size(cs_mode
, false);
1510 * F16h and F15h model 30h have only limited cs_modes.
1512 static int f16_dbam_to_chip_select(struct amd64_pvt
*pvt
, u8 dct
,
1513 unsigned cs_mode
, int cs_mask_nr
)
1515 WARN_ON(cs_mode
> 12);
1517 if (cs_mode
== 6 || cs_mode
== 8 ||
1518 cs_mode
== 9 || cs_mode
== 12)
1521 return ddr3_cs_size(cs_mode
, false);
1524 static int f17_base_addr_to_cs_size(struct amd64_pvt
*pvt
, u8 umc
,
1525 unsigned int cs_mode
, int csrow_nr
)
1527 u32 base_addr
= pvt
->csels
[umc
].csbases
[csrow_nr
];
1529 /* Each mask is used for every two base addresses. */
1530 u32 addr_mask
= pvt
->csels
[umc
].csmasks
[csrow_nr
>> 1];
1532 /* Register [31:1] = Address [39:9]. Size is in kBs here. */
1533 u32 size
= ((addr_mask
>> 1) - (base_addr
>> 1) + 1) >> 1;
1535 edac_dbg(1, "BaseAddr: 0x%x, AddrMask: 0x%x\n", base_addr
, addr_mask
);
1537 /* Return size in MBs. */
1541 static void read_dram_ctl_register(struct amd64_pvt
*pvt
)
1544 if (pvt
->fam
== 0xf)
1547 if (!amd64_read_pci_cfg(pvt
->F2
, DCT_SEL_LO
, &pvt
->dct_sel_lo
)) {
1548 edac_dbg(0, "F2x110 (DCTSelLow): 0x%08x, High range addrs at: 0x%x\n",
1549 pvt
->dct_sel_lo
, dct_sel_baseaddr(pvt
));
1551 edac_dbg(0, " DCTs operate in %s mode\n",
1552 (dct_ganging_enabled(pvt
) ? "ganged" : "unganged"));
1554 if (!dct_ganging_enabled(pvt
))
1555 edac_dbg(0, " Address range split per DCT: %s\n",
1556 (dct_high_range_enabled(pvt
) ? "yes" : "no"));
1558 edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
1559 (dct_data_intlv_enabled(pvt
) ? "enabled" : "disabled"),
1560 (dct_memory_cleared(pvt
) ? "yes" : "no"));
1562 edac_dbg(0, " channel interleave: %s, "
1563 "interleave bits selector: 0x%x\n",
1564 (dct_interleave_enabled(pvt
) ? "enabled" : "disabled"),
1565 dct_sel_interleave_addr(pvt
));
1568 amd64_read_pci_cfg(pvt
->F2
, DCT_SEL_HI
, &pvt
->dct_sel_hi
);
1572 * Determine channel (DCT) based on the interleaving mode (see F15h M30h BKDG,
1573 * 2.10.12 Memory Interleaving Modes).
1575 static u8
f15_m30h_determine_channel(struct amd64_pvt
*pvt
, u64 sys_addr
,
1576 u8 intlv_en
, int num_dcts_intlv
,
1583 return (u8
)(dct_sel
);
1585 if (num_dcts_intlv
== 2) {
1586 select
= (sys_addr
>> 8) & 0x3;
1587 channel
= select
? 0x3 : 0;
1588 } else if (num_dcts_intlv
== 4) {
1589 u8 intlv_addr
= dct_sel_interleave_addr(pvt
);
1590 switch (intlv_addr
) {
1592 channel
= (sys_addr
>> 8) & 0x3;
1595 channel
= (sys_addr
>> 9) & 0x3;
1603 * Determine channel (DCT) based on the interleaving mode: F10h BKDG, 2.8.9 Memory
1604 * Interleaving Modes.
1606 static u8
f1x_determine_channel(struct amd64_pvt
*pvt
, u64 sys_addr
,
1607 bool hi_range_sel
, u8 intlv_en
)
1609 u8 dct_sel_high
= (pvt
->dct_sel_lo
>> 1) & 1;
1611 if (dct_ganging_enabled(pvt
))
1615 return dct_sel_high
;
1618 * see F2x110[DctSelIntLvAddr] - channel interleave mode
1620 if (dct_interleave_enabled(pvt
)) {
1621 u8 intlv_addr
= dct_sel_interleave_addr(pvt
);
1623 /* return DCT select function: 0=DCT0, 1=DCT1 */
1625 return sys_addr
>> 6 & 1;
1627 if (intlv_addr
& 0x2) {
1628 u8 shift
= intlv_addr
& 0x1 ? 9 : 6;
1629 u32 temp
= hweight_long((u32
) ((sys_addr
>> 16) & 0x1F)) & 1;
1631 return ((sys_addr
>> shift
) & 1) ^ temp
;
1634 if (intlv_addr
& 0x4) {
1635 u8 shift
= intlv_addr
& 0x1 ? 9 : 8;
1637 return (sys_addr
>> shift
) & 1;
1640 return (sys_addr
>> (12 + hweight8(intlv_en
))) & 1;
1643 if (dct_high_range_enabled(pvt
))
1644 return ~dct_sel_high
& 1;
1649 /* Convert the sys_addr to the normalized DCT address */
1650 static u64
f1x_get_norm_dct_addr(struct amd64_pvt
*pvt
, u8 range
,
1651 u64 sys_addr
, bool hi_rng
,
1652 u32 dct_sel_base_addr
)
1655 u64 dram_base
= get_dram_base(pvt
, range
);
1656 u64 hole_off
= f10_dhar_offset(pvt
);
1657 u64 dct_sel_base_off
= (u64
)(pvt
->dct_sel_hi
& 0xFFFFFC00) << 16;
1662 * base address of high range is below 4Gb
1663 * (bits [47:27] at [31:11])
1664 * DRAM address space on this DCT is hoisted above 4Gb &&
1667 * remove hole offset from sys_addr
1669 * remove high range offset from sys_addr
1671 if ((!(dct_sel_base_addr
>> 16) ||
1672 dct_sel_base_addr
< dhar_base(pvt
)) &&
1674 (sys_addr
>= BIT_64(32)))
1675 chan_off
= hole_off
;
1677 chan_off
= dct_sel_base_off
;
1681 * we have a valid hole &&
1686 * remove dram base to normalize to DCT address
1688 if (dhar_valid(pvt
) && (sys_addr
>= BIT_64(32)))
1689 chan_off
= hole_off
;
1691 chan_off
= dram_base
;
1694 return (sys_addr
& GENMASK_ULL(47,6)) - (chan_off
& GENMASK_ULL(47,23));
1698 * checks if the csrow passed in is marked as SPARED, if so returns the new
1701 static int f10_process_possible_spare(struct amd64_pvt
*pvt
, u8 dct
, int csrow
)
1705 if (online_spare_swap_done(pvt
, dct
) &&
1706 csrow
== online_spare_bad_dramcs(pvt
, dct
)) {
1708 for_each_chip_select(tmp_cs
, dct
, pvt
) {
1709 if (chip_select_base(tmp_cs
, dct
, pvt
) & 0x2) {
1719 * Iterate over the DRAM DCT "base" and "mask" registers looking for a
1720 * SystemAddr match on the specified 'ChannelSelect' and 'NodeID'
1723 * -EINVAL: NOT FOUND
1724 * 0..csrow = Chip-Select Row
1726 static int f1x_lookup_addr_in_dct(u64 in_addr
, u8 nid
, u8 dct
)
1728 struct mem_ctl_info
*mci
;
1729 struct amd64_pvt
*pvt
;
1730 u64 cs_base
, cs_mask
;
1731 int cs_found
= -EINVAL
;
1734 mci
= edac_mc_find(nid
);
1738 pvt
= mci
->pvt_info
;
1740 edac_dbg(1, "input addr: 0x%llx, DCT: %d\n", in_addr
, dct
);
1742 for_each_chip_select(csrow
, dct
, pvt
) {
1743 if (!csrow_enabled(csrow
, dct
, pvt
))
1746 get_cs_base_and_mask(pvt
, csrow
, dct
, &cs_base
, &cs_mask
);
1748 edac_dbg(1, " CSROW=%d CSBase=0x%llx CSMask=0x%llx\n",
1749 csrow
, cs_base
, cs_mask
);
1753 edac_dbg(1, " (InputAddr & ~CSMask)=0x%llx (CSBase & ~CSMask)=0x%llx\n",
1754 (in_addr
& cs_mask
), (cs_base
& cs_mask
));
1756 if ((in_addr
& cs_mask
) == (cs_base
& cs_mask
)) {
1757 if (pvt
->fam
== 0x15 && pvt
->model
>= 0x30) {
1761 cs_found
= f10_process_possible_spare(pvt
, dct
, csrow
);
1763 edac_dbg(1, " MATCH csrow=%d\n", cs_found
);
1771 * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is
1772 * swapped with a region located at the bottom of memory so that the GPU can use
1773 * the interleaved region and thus two channels.
1775 static u64
f1x_swap_interleaved_region(struct amd64_pvt
*pvt
, u64 sys_addr
)
1777 u32 swap_reg
, swap_base
, swap_limit
, rgn_size
, tmp_addr
;
1779 if (pvt
->fam
== 0x10) {
1780 /* only revC3 and revE have that feature */
1781 if (pvt
->model
< 4 || (pvt
->model
< 0xa && pvt
->stepping
< 3))
1785 amd64_read_pci_cfg(pvt
->F2
, SWAP_INTLV_REG
, &swap_reg
);
1787 if (!(swap_reg
& 0x1))
1790 swap_base
= (swap_reg
>> 3) & 0x7f;
1791 swap_limit
= (swap_reg
>> 11) & 0x7f;
1792 rgn_size
= (swap_reg
>> 20) & 0x7f;
1793 tmp_addr
= sys_addr
>> 27;
1795 if (!(sys_addr
>> 34) &&
1796 (((tmp_addr
>= swap_base
) &&
1797 (tmp_addr
<= swap_limit
)) ||
1798 (tmp_addr
< rgn_size
)))
1799 return sys_addr
^ (u64
)swap_base
<< 27;
1804 /* For a given @dram_range, check if @sys_addr falls within it. */
1805 static int f1x_match_to_this_node(struct amd64_pvt
*pvt
, unsigned range
,
1806 u64 sys_addr
, int *chan_sel
)
1808 int cs_found
= -EINVAL
;
1812 bool high_range
= false;
1814 u8 node_id
= dram_dst_node(pvt
, range
);
1815 u8 intlv_en
= dram_intlv_en(pvt
, range
);
1816 u32 intlv_sel
= dram_intlv_sel(pvt
, range
);
1818 edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1819 range
, sys_addr
, get_dram_limit(pvt
, range
));
1821 if (dhar_valid(pvt
) &&
1822 dhar_base(pvt
) <= sys_addr
&&
1823 sys_addr
< BIT_64(32)) {
1824 amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1829 if (intlv_en
&& (intlv_sel
!= ((sys_addr
>> 12) & intlv_en
)))
1832 sys_addr
= f1x_swap_interleaved_region(pvt
, sys_addr
);
1834 dct_sel_base
= dct_sel_baseaddr(pvt
);
1837 * check whether addresses >= DctSelBaseAddr[47:27] are to be used to
1838 * select between DCT0 and DCT1.
1840 if (dct_high_range_enabled(pvt
) &&
1841 !dct_ganging_enabled(pvt
) &&
1842 ((sys_addr
>> 27) >= (dct_sel_base
>> 11)))
1845 channel
= f1x_determine_channel(pvt
, sys_addr
, high_range
, intlv_en
);
1847 chan_addr
= f1x_get_norm_dct_addr(pvt
, range
, sys_addr
,
1848 high_range
, dct_sel_base
);
1850 /* Remove node interleaving, see F1x120 */
1852 chan_addr
= ((chan_addr
>> (12 + hweight8(intlv_en
))) << 12) |
1853 (chan_addr
& 0xfff);
1855 /* remove channel interleave */
1856 if (dct_interleave_enabled(pvt
) &&
1857 !dct_high_range_enabled(pvt
) &&
1858 !dct_ganging_enabled(pvt
)) {
1860 if (dct_sel_interleave_addr(pvt
) != 1) {
1861 if (dct_sel_interleave_addr(pvt
) == 0x3)
1863 chan_addr
= ((chan_addr
>> 10) << 9) |
1864 (chan_addr
& 0x1ff);
1866 /* A[6] or hash 6 */
1867 chan_addr
= ((chan_addr
>> 7) << 6) |
1871 chan_addr
= ((chan_addr
>> 13) << 12) |
1872 (chan_addr
& 0xfff);
1875 edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr
);
1877 cs_found
= f1x_lookup_addr_in_dct(chan_addr
, node_id
, channel
);
1880 *chan_sel
= channel
;
1885 static int f15_m30h_match_to_this_node(struct amd64_pvt
*pvt
, unsigned range
,
1886 u64 sys_addr
, int *chan_sel
)
1888 int cs_found
= -EINVAL
;
1889 int num_dcts_intlv
= 0;
1890 u64 chan_addr
, chan_offset
;
1891 u64 dct_base
, dct_limit
;
1892 u32 dct_cont_base_reg
, dct_cont_limit_reg
, tmp
;
1893 u8 channel
, alias_channel
, leg_mmio_hole
, dct_sel
, dct_offset_en
;
1895 u64 dhar_offset
= f10_dhar_offset(pvt
);
1896 u8 intlv_addr
= dct_sel_interleave_addr(pvt
);
1897 u8 node_id
= dram_dst_node(pvt
, range
);
1898 u8 intlv_en
= dram_intlv_en(pvt
, range
);
1900 amd64_read_pci_cfg(pvt
->F1
, DRAM_CONT_BASE
, &dct_cont_base_reg
);
1901 amd64_read_pci_cfg(pvt
->F1
, DRAM_CONT_LIMIT
, &dct_cont_limit_reg
);
1903 dct_offset_en
= (u8
) ((dct_cont_base_reg
>> 3) & BIT(0));
1904 dct_sel
= (u8
) ((dct_cont_base_reg
>> 4) & 0x7);
1906 edac_dbg(1, "(range %d) SystemAddr= 0x%llx Limit=0x%llx\n",
1907 range
, sys_addr
, get_dram_limit(pvt
, range
));
1909 if (!(get_dram_base(pvt
, range
) <= sys_addr
) &&
1910 !(get_dram_limit(pvt
, range
) >= sys_addr
))
1913 if (dhar_valid(pvt
) &&
1914 dhar_base(pvt
) <= sys_addr
&&
1915 sys_addr
< BIT_64(32)) {
1916 amd64_warn("Huh? Address is in the MMIO hole: 0x%016llx\n",
1921 /* Verify sys_addr is within DCT Range. */
1922 dct_base
= (u64
) dct_sel_baseaddr(pvt
);
1923 dct_limit
= (dct_cont_limit_reg
>> 11) & 0x1FFF;
1925 if (!(dct_cont_base_reg
& BIT(0)) &&
1926 !(dct_base
<= (sys_addr
>> 27) &&
1927 dct_limit
>= (sys_addr
>> 27)))
1930 /* Verify number of dct's that participate in channel interleaving. */
1931 num_dcts_intlv
= (int) hweight8(intlv_en
);
1933 if (!(num_dcts_intlv
% 2 == 0) || (num_dcts_intlv
> 4))
1936 if (pvt
->model
>= 0x60)
1937 channel
= f1x_determine_channel(pvt
, sys_addr
, false, intlv_en
);
1939 channel
= f15_m30h_determine_channel(pvt
, sys_addr
, intlv_en
,
1940 num_dcts_intlv
, dct_sel
);
1942 /* Verify we stay within the MAX number of channels allowed */
1946 leg_mmio_hole
= (u8
) (dct_cont_base_reg
>> 1 & BIT(0));
1948 /* Get normalized DCT addr */
1949 if (leg_mmio_hole
&& (sys_addr
>= BIT_64(32)))
1950 chan_offset
= dhar_offset
;
1952 chan_offset
= dct_base
<< 27;
1954 chan_addr
= sys_addr
- chan_offset
;
1956 /* remove channel interleave */
1957 if (num_dcts_intlv
== 2) {
1958 if (intlv_addr
== 0x4)
1959 chan_addr
= ((chan_addr
>> 9) << 8) |
1961 else if (intlv_addr
== 0x5)
1962 chan_addr
= ((chan_addr
>> 10) << 9) |
1963 (chan_addr
& 0x1ff);
1967 } else if (num_dcts_intlv
== 4) {
1968 if (intlv_addr
== 0x4)
1969 chan_addr
= ((chan_addr
>> 10) << 8) |
1971 else if (intlv_addr
== 0x5)
1972 chan_addr
= ((chan_addr
>> 11) << 9) |
1973 (chan_addr
& 0x1ff);
1978 if (dct_offset_en
) {
1979 amd64_read_pci_cfg(pvt
->F1
,
1980 DRAM_CONT_HIGH_OFF
+ (int) channel
* 4,
1982 chan_addr
+= (u64
) ((tmp
>> 11) & 0xfff) << 27;
1985 f15h_select_dct(pvt
, channel
);
1987 edac_dbg(1, " Normalized DCT addr: 0x%llx\n", chan_addr
);
1991 * if channel = 3, then alias it to 1. This is because, in F15 M30h,
1992 * there is support for 4 DCT's, but only 2 are currently functional.
1993 * They are DCT0 and DCT3. But we have read all registers of DCT3 into
1994 * pvt->csels[1]. So we need to use '1' here to get correct info.
1995 * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications.
1997 alias_channel
= (channel
== 3) ? 1 : channel
;
1999 cs_found
= f1x_lookup_addr_in_dct(chan_addr
, node_id
, alias_channel
);
2002 *chan_sel
= alias_channel
;
2007 static int f1x_translate_sysaddr_to_cs(struct amd64_pvt
*pvt
,
2011 int cs_found
= -EINVAL
;
2014 for (range
= 0; range
< DRAM_RANGES
; range
++) {
2015 if (!dram_rw(pvt
, range
))
2018 if (pvt
->fam
== 0x15 && pvt
->model
>= 0x30)
2019 cs_found
= f15_m30h_match_to_this_node(pvt
, range
,
2023 else if ((get_dram_base(pvt
, range
) <= sys_addr
) &&
2024 (get_dram_limit(pvt
, range
) >= sys_addr
)) {
2025 cs_found
= f1x_match_to_this_node(pvt
, range
,
2026 sys_addr
, chan_sel
);
2035 * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps
2036 * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW).
2038 * The @sys_addr is usually an error address received from the hardware
2041 static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info
*mci
, u64 sys_addr
,
2042 struct err_info
*err
)
2044 struct amd64_pvt
*pvt
= mci
->pvt_info
;
2046 error_address_to_page_and_offset(sys_addr
, err
);
2048 err
->csrow
= f1x_translate_sysaddr_to_cs(pvt
, sys_addr
, &err
->channel
);
2049 if (err
->csrow
< 0) {
2050 err
->err_code
= ERR_CSROW
;
2055 * We need the syndromes for channel detection only when we're
2056 * ganged. Otherwise @chan should already contain the channel at
2059 if (dct_ganging_enabled(pvt
))
2060 err
->channel
= get_channel_from_ecc_syndrome(mci
, err
->syndrome
);
2064 * debug routine to display the memory sizes of all logical DIMMs and its
2067 static void debug_display_dimm_sizes(struct amd64_pvt
*pvt
, u8 ctrl
)
2069 int dimm
, size0
, size1
;
2070 u32
*dcsb
= ctrl
? pvt
->csels
[1].csbases
: pvt
->csels
[0].csbases
;
2071 u32 dbam
= ctrl
? pvt
->dbam1
: pvt
->dbam0
;
2073 if (pvt
->fam
== 0xf) {
2074 /* K8 families < revF not supported yet */
2075 if (pvt
->ext_model
< K8_REV_F
)
2081 if (pvt
->fam
== 0x10) {
2082 dbam
= (ctrl
&& !dct_ganging_enabled(pvt
)) ? pvt
->dbam1
2084 dcsb
= (ctrl
&& !dct_ganging_enabled(pvt
)) ?
2085 pvt
->csels
[1].csbases
:
2086 pvt
->csels
[0].csbases
;
2089 dcsb
= pvt
->csels
[1].csbases
;
2091 edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n",
2094 edac_printk(KERN_DEBUG
, EDAC_MC
, "DCT%d chip selects:\n", ctrl
);
2096 /* Dump memory sizes for DIMM and its CSROWs */
2097 for (dimm
= 0; dimm
< 4; dimm
++) {
2100 if (dcsb
[dimm
*2] & DCSB_CS_ENABLE
)
2102 * For F15m60h, we need multiplier for LRDIMM cs_size
2103 * calculation. We pass dimm value to the dbam_to_cs
2104 * mapper so we can find the multiplier from the
2105 * corresponding DCSM.
2107 size0
= pvt
->ops
->dbam_to_cs(pvt
, ctrl
,
2108 DBAM_DIMM(dimm
, dbam
),
2112 if (dcsb
[dimm
*2 + 1] & DCSB_CS_ENABLE
)
2113 size1
= pvt
->ops
->dbam_to_cs(pvt
, ctrl
,
2114 DBAM_DIMM(dimm
, dbam
),
2117 amd64_info(EDAC_MC
": %d: %5dMB %d: %5dMB\n",
2119 dimm
* 2 + 1, size1
);
2123 static struct amd64_family_type family_types
[] = {
2126 .f1_id
= PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP
,
2127 .f2_id
= PCI_DEVICE_ID_AMD_K8_NB_MEMCTL
,
2129 .early_channel_count
= k8_early_channel_count
,
2130 .map_sysaddr_to_csrow
= k8_map_sysaddr_to_csrow
,
2131 .dbam_to_cs
= k8_dbam_to_chip_select
,
2136 .f1_id
= PCI_DEVICE_ID_AMD_10H_NB_MAP
,
2137 .f2_id
= PCI_DEVICE_ID_AMD_10H_NB_DRAM
,
2139 .early_channel_count
= f1x_early_channel_count
,
2140 .map_sysaddr_to_csrow
= f1x_map_sysaddr_to_csrow
,
2141 .dbam_to_cs
= f10_dbam_to_chip_select
,
2146 .f1_id
= PCI_DEVICE_ID_AMD_15H_NB_F1
,
2147 .f2_id
= PCI_DEVICE_ID_AMD_15H_NB_F2
,
2149 .early_channel_count
= f1x_early_channel_count
,
2150 .map_sysaddr_to_csrow
= f1x_map_sysaddr_to_csrow
,
2151 .dbam_to_cs
= f15_dbam_to_chip_select
,
2155 .ctl_name
= "F15h_M30h",
2156 .f1_id
= PCI_DEVICE_ID_AMD_15H_M30H_NB_F1
,
2157 .f2_id
= PCI_DEVICE_ID_AMD_15H_M30H_NB_F2
,
2159 .early_channel_count
= f1x_early_channel_count
,
2160 .map_sysaddr_to_csrow
= f1x_map_sysaddr_to_csrow
,
2161 .dbam_to_cs
= f16_dbam_to_chip_select
,
2165 .ctl_name
= "F15h_M60h",
2166 .f1_id
= PCI_DEVICE_ID_AMD_15H_M60H_NB_F1
,
2167 .f2_id
= PCI_DEVICE_ID_AMD_15H_M60H_NB_F2
,
2169 .early_channel_count
= f1x_early_channel_count
,
2170 .map_sysaddr_to_csrow
= f1x_map_sysaddr_to_csrow
,
2171 .dbam_to_cs
= f15_m60h_dbam_to_chip_select
,
2176 .f1_id
= PCI_DEVICE_ID_AMD_16H_NB_F1
,
2177 .f2_id
= PCI_DEVICE_ID_AMD_16H_NB_F2
,
2179 .early_channel_count
= f1x_early_channel_count
,
2180 .map_sysaddr_to_csrow
= f1x_map_sysaddr_to_csrow
,
2181 .dbam_to_cs
= f16_dbam_to_chip_select
,
2185 .ctl_name
= "F16h_M30h",
2186 .f1_id
= PCI_DEVICE_ID_AMD_16H_M30H_NB_F1
,
2187 .f2_id
= PCI_DEVICE_ID_AMD_16H_M30H_NB_F2
,
2189 .early_channel_count
= f1x_early_channel_count
,
2190 .map_sysaddr_to_csrow
= f1x_map_sysaddr_to_csrow
,
2191 .dbam_to_cs
= f16_dbam_to_chip_select
,
2196 .f0_id
= PCI_DEVICE_ID_AMD_17H_DF_F0
,
2197 .f6_id
= PCI_DEVICE_ID_AMD_17H_DF_F6
,
2199 .early_channel_count
= f17_early_channel_count
,
2200 .dbam_to_cs
= f17_base_addr_to_cs_size
,
2204 .ctl_name
= "F17h_M10h",
2205 .f0_id
= PCI_DEVICE_ID_AMD_17H_M10H_DF_F0
,
2206 .f6_id
= PCI_DEVICE_ID_AMD_17H_M10H_DF_F6
,
2208 .early_channel_count
= f17_early_channel_count
,
2209 .dbam_to_cs
= f17_base_addr_to_cs_size
,
2215 * These are tables of eigenvectors (one per line) which can be used for the
2216 * construction of the syndrome tables. The modified syndrome search algorithm
2217 * uses those to find the symbol in error and thus the DIMM.
2219 * Algorithm courtesy of Ross LaFetra from AMD.
2221 static const u16 x4_vectors
[] = {
2222 0x2f57, 0x1afe, 0x66cc, 0xdd88,
2223 0x11eb, 0x3396, 0x7f4c, 0xeac8,
2224 0x0001, 0x0002, 0x0004, 0x0008,
2225 0x1013, 0x3032, 0x4044, 0x8088,
2226 0x106b, 0x30d6, 0x70fc, 0xe0a8,
2227 0x4857, 0xc4fe, 0x13cc, 0x3288,
2228 0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
2229 0x1f39, 0x251e, 0xbd6c, 0x6bd8,
2230 0x15c1, 0x2a42, 0x89ac, 0x4758,
2231 0x2b03, 0x1602, 0x4f0c, 0xca08,
2232 0x1f07, 0x3a0e, 0x6b04, 0xbd08,
2233 0x8ba7, 0x465e, 0x244c, 0x1cc8,
2234 0x2b87, 0x164e, 0x642c, 0xdc18,
2235 0x40b9, 0x80de, 0x1094, 0x20e8,
2236 0x27db, 0x1eb6, 0x9dac, 0x7b58,
2237 0x11c1, 0x2242, 0x84ac, 0x4c58,
2238 0x1be5, 0x2d7a, 0x5e34, 0xa718,
2239 0x4b39, 0x8d1e, 0x14b4, 0x28d8,
2240 0x4c97, 0xc87e, 0x11fc, 0x33a8,
2241 0x8e97, 0x497e, 0x2ffc, 0x1aa8,
2242 0x16b3, 0x3d62, 0x4f34, 0x8518,
2243 0x1e2f, 0x391a, 0x5cac, 0xf858,
2244 0x1d9f, 0x3b7a, 0x572c, 0xfe18,
2245 0x15f5, 0x2a5a, 0x5264, 0xa3b8,
2246 0x1dbb, 0x3b66, 0x715c, 0xe3f8,
2247 0x4397, 0xc27e, 0x17fc, 0x3ea8,
2248 0x1617, 0x3d3e, 0x6464, 0xb8b8,
2249 0x23ff, 0x12aa, 0xab6c, 0x56d8,
2250 0x2dfb, 0x1ba6, 0x913c, 0x7328,
2251 0x185d, 0x2ca6, 0x7914, 0x9e28,
2252 0x171b, 0x3e36, 0x7d7c, 0xebe8,
2253 0x4199, 0x82ee, 0x19f4, 0x2e58,
2254 0x4807, 0xc40e, 0x130c, 0x3208,
2255 0x1905, 0x2e0a, 0x5804, 0xac08,
2256 0x213f, 0x132a, 0xadfc, 0x5ba8,
2257 0x19a9, 0x2efe, 0xb5cc, 0x6f88,
2260 static const u16 x8_vectors
[] = {
2261 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
2262 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
2263 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
2264 0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80,
2265 0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780,
2266 0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080,
2267 0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080,
2268 0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080,
2269 0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80,
2270 0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580,
2271 0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880,
2272 0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280,
2273 0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180,
2274 0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580,
2275 0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280,
2276 0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180,
2277 0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080,
2278 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
2279 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
2282 static int decode_syndrome(u16 syndrome
, const u16
*vectors
, unsigned num_vecs
,
2285 unsigned int i
, err_sym
;
2287 for (err_sym
= 0; err_sym
< num_vecs
/ v_dim
; err_sym
++) {
2289 unsigned v_idx
= err_sym
* v_dim
;
2290 unsigned v_end
= (err_sym
+ 1) * v_dim
;
2292 /* walk over all 16 bits of the syndrome */
2293 for (i
= 1; i
< (1U << 16); i
<<= 1) {
2295 /* if bit is set in that eigenvector... */
2296 if (v_idx
< v_end
&& vectors
[v_idx
] & i
) {
2297 u16 ev_comp
= vectors
[v_idx
++];
2299 /* ... and bit set in the modified syndrome, */
2309 /* can't get to zero, move to next symbol */
2314 edac_dbg(0, "syndrome(%x) not found\n", syndrome
);
2318 static int map_err_sym_to_channel(int err_sym
, int sym_size
)
2331 return err_sym
>> 4;
2337 /* imaginary bits not in a DIMM */
2339 WARN(1, KERN_ERR
"Invalid error symbol: 0x%x\n",
2351 return err_sym
>> 3;
2357 static int get_channel_from_ecc_syndrome(struct mem_ctl_info
*mci
, u16 syndrome
)
2359 struct amd64_pvt
*pvt
= mci
->pvt_info
;
2362 if (pvt
->ecc_sym_sz
== 8)
2363 err_sym
= decode_syndrome(syndrome
, x8_vectors
,
2364 ARRAY_SIZE(x8_vectors
),
2366 else if (pvt
->ecc_sym_sz
== 4)
2367 err_sym
= decode_syndrome(syndrome
, x4_vectors
,
2368 ARRAY_SIZE(x4_vectors
),
2371 amd64_warn("Illegal syndrome type: %u\n", pvt
->ecc_sym_sz
);
2375 return map_err_sym_to_channel(err_sym
, pvt
->ecc_sym_sz
);
2378 static void __log_ecc_error(struct mem_ctl_info
*mci
, struct err_info
*err
,
2381 enum hw_event_mc_err_type err_type
;
2385 err_type
= HW_EVENT_ERR_CORRECTED
;
2386 else if (ecc_type
== 1)
2387 err_type
= HW_EVENT_ERR_UNCORRECTED
;
2388 else if (ecc_type
== 3)
2389 err_type
= HW_EVENT_ERR_DEFERRED
;
2391 WARN(1, "Something is rotten in the state of Denmark.\n");
2395 switch (err
->err_code
) {
2400 string
= "Failed to map error addr to a node";
2403 string
= "Failed to map error addr to a csrow";
2406 string
= "Unknown syndrome - possible error reporting race";
2409 string
= "MCA_SYND not valid - unknown syndrome and csrow";
2412 string
= "Cannot decode normalized address";
2415 string
= "WTF error";
2419 edac_mc_handle_error(err_type
, mci
, 1,
2420 err
->page
, err
->offset
, err
->syndrome
,
2421 err
->csrow
, err
->channel
, -1,
2425 static inline void decode_bus_error(int node_id
, struct mce
*m
)
2427 struct mem_ctl_info
*mci
;
2428 struct amd64_pvt
*pvt
;
2429 u8 ecc_type
= (m
->status
>> 45) & 0x3;
2430 u8 xec
= XEC(m
->status
, 0x1f);
2431 u16 ec
= EC(m
->status
);
2433 struct err_info err
;
2435 mci
= edac_mc_find(node_id
);
2439 pvt
= mci
->pvt_info
;
2441 /* Bail out early if this was an 'observed' error */
2442 if (PP(ec
) == NBSL_PP_OBS
)
2445 /* Do only ECC errors */
2446 if (xec
&& xec
!= F10_NBSL_EXT_ERR_ECC
)
2449 memset(&err
, 0, sizeof(err
));
2451 sys_addr
= get_error_address(pvt
, m
);
2454 err
.syndrome
= extract_syndrome(m
->status
);
2456 pvt
->ops
->map_sysaddr_to_csrow(mci
, sys_addr
, &err
);
2458 __log_ecc_error(mci
, &err
, ecc_type
);
2462 * To find the UMC channel represented by this bank we need to match on its
2463 * instance_id. The instance_id of a bank is held in the lower 32 bits of its
2466 static int find_umc_channel(struct amd64_pvt
*pvt
, struct mce
*m
)
2468 u32 umc_instance_id
[] = {0x50f00, 0x150f00};
2469 u32 instance_id
= m
->ipid
& GENMASK(31, 0);
2470 int i
, channel
= -1;
2472 for (i
= 0; i
< ARRAY_SIZE(umc_instance_id
); i
++)
2473 if (umc_instance_id
[i
] == instance_id
)
2479 static void decode_umc_error(int node_id
, struct mce
*m
)
2481 u8 ecc_type
= (m
->status
>> 45) & 0x3;
2482 struct mem_ctl_info
*mci
;
2483 struct amd64_pvt
*pvt
;
2484 struct err_info err
;
2487 mci
= edac_mc_find(node_id
);
2491 pvt
= mci
->pvt_info
;
2493 memset(&err
, 0, sizeof(err
));
2495 if (m
->status
& MCI_STATUS_DEFERRED
)
2498 err
.channel
= find_umc_channel(pvt
, m
);
2499 if (err
.channel
< 0) {
2500 err
.err_code
= ERR_CHANNEL
;
2504 if (!(m
->status
& MCI_STATUS_SYNDV
)) {
2505 err
.err_code
= ERR_SYND
;
2509 if (ecc_type
== 2) {
2510 u8 length
= (m
->synd
>> 18) & 0x3f;
2513 err
.syndrome
= (m
->synd
>> 32) & GENMASK(length
- 1, 0);
2515 err
.err_code
= ERR_CHANNEL
;
2518 err
.csrow
= m
->synd
& 0x7;
2520 if (umc_normaddr_to_sysaddr(m
->addr
, pvt
->mc_node_id
, err
.channel
, &sys_addr
)) {
2521 err
.err_code
= ERR_NORM_ADDR
;
2525 error_address_to_page_and_offset(sys_addr
, &err
);
2528 __log_ecc_error(mci
, &err
, ecc_type
);
2532 * Use pvt->F3 which contains the F3 CPU PCI device to get the related
2533 * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
2534 * Reserve F0 and F6 on systems with a UMC.
2537 reserve_mc_sibling_devs(struct amd64_pvt
*pvt
, u16 pci_id1
, u16 pci_id2
)
2540 pvt
->F0
= pci_get_related_function(pvt
->F3
->vendor
, pci_id1
, pvt
->F3
);
2542 amd64_err("F0 not found, device 0x%x (broken BIOS?)\n", pci_id1
);
2546 pvt
->F6
= pci_get_related_function(pvt
->F3
->vendor
, pci_id2
, pvt
->F3
);
2548 pci_dev_put(pvt
->F0
);
2551 amd64_err("F6 not found: device 0x%x (broken BIOS?)\n", pci_id2
);
2555 edac_dbg(1, "F0: %s\n", pci_name(pvt
->F0
));
2556 edac_dbg(1, "F3: %s\n", pci_name(pvt
->F3
));
2557 edac_dbg(1, "F6: %s\n", pci_name(pvt
->F6
));
2562 /* Reserve the ADDRESS MAP Device */
2563 pvt
->F1
= pci_get_related_function(pvt
->F3
->vendor
, pci_id1
, pvt
->F3
);
2565 amd64_err("F1 not found: device 0x%x (broken BIOS?)\n", pci_id1
);
2569 /* Reserve the DCT Device */
2570 pvt
->F2
= pci_get_related_function(pvt
->F3
->vendor
, pci_id2
, pvt
->F3
);
2572 pci_dev_put(pvt
->F1
);
2575 amd64_err("F2 not found: device 0x%x (broken BIOS?)\n", pci_id2
);
2579 edac_dbg(1, "F1: %s\n", pci_name(pvt
->F1
));
2580 edac_dbg(1, "F2: %s\n", pci_name(pvt
->F2
));
2581 edac_dbg(1, "F3: %s\n", pci_name(pvt
->F3
));
2586 static void free_mc_sibling_devs(struct amd64_pvt
*pvt
)
2589 pci_dev_put(pvt
->F0
);
2590 pci_dev_put(pvt
->F6
);
2592 pci_dev_put(pvt
->F1
);
2593 pci_dev_put(pvt
->F2
);
2597 static void determine_ecc_sym_sz(struct amd64_pvt
*pvt
)
2599 pvt
->ecc_sym_sz
= 4;
2604 for (i
= 0; i
< NUM_UMCS
; i
++) {
2605 /* Check enabled channels only: */
2606 if ((pvt
->umc
[i
].sdp_ctrl
& UMC_SDP_INIT
) &&
2607 (pvt
->umc
[i
].ecc_ctrl
& BIT(7))) {
2608 pvt
->ecc_sym_sz
= 8;
2616 if (pvt
->fam
>= 0x10) {
2619 amd64_read_pci_cfg(pvt
->F3
, EXT_NB_MCA_CFG
, &tmp
);
2620 /* F16h has only DCT0, so no need to read dbam1. */
2621 if (pvt
->fam
!= 0x16)
2622 amd64_read_dct_pci_cfg(pvt
, 1, DBAM0
, &pvt
->dbam1
);
2624 /* F10h, revD and later can do x8 ECC too. */
2625 if ((pvt
->fam
> 0x10 || pvt
->model
> 7) && tmp
& BIT(25))
2626 pvt
->ecc_sym_sz
= 8;
2631 * Retrieve the hardware registers of the memory controller.
2633 static void __read_mc_regs_df(struct amd64_pvt
*pvt
)
2635 u8 nid
= pvt
->mc_node_id
;
2636 struct amd64_umc
*umc
;
2639 /* Read registers from each UMC */
2640 for (i
= 0; i
< NUM_UMCS
; i
++) {
2642 umc_base
= get_umc_base(i
);
2645 amd_smn_read(nid
, umc_base
+ UMCCH_DIMM_CFG
, &umc
->dimm_cfg
);
2646 amd_smn_read(nid
, umc_base
+ UMCCH_UMC_CFG
, &umc
->umc_cfg
);
2647 amd_smn_read(nid
, umc_base
+ UMCCH_SDP_CTRL
, &umc
->sdp_ctrl
);
2648 amd_smn_read(nid
, umc_base
+ UMCCH_ECC_CTRL
, &umc
->ecc_ctrl
);
2649 amd_smn_read(nid
, umc_base
+ UMCCH_UMC_CAP_HI
, &umc
->umc_cap_hi
);
2654 * Retrieve the hardware registers of the memory controller (this includes the
2655 * 'Address Map' and 'Misc' device regs)
2657 static void read_mc_regs(struct amd64_pvt
*pvt
)
2663 * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since
2664 * those are Read-As-Zero.
2666 rdmsrl(MSR_K8_TOP_MEM1
, pvt
->top_mem
);
2667 edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt
->top_mem
);
2669 /* Check first whether TOP_MEM2 is enabled: */
2670 rdmsrl(MSR_K8_SYSCFG
, msr_val
);
2671 if (msr_val
& BIT(21)) {
2672 rdmsrl(MSR_K8_TOP_MEM2
, pvt
->top_mem2
);
2673 edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt
->top_mem2
);
2675 edac_dbg(0, " TOP_MEM2 disabled\n");
2679 __read_mc_regs_df(pvt
);
2680 amd64_read_pci_cfg(pvt
->F0
, DF_DHAR
, &pvt
->dhar
);
2685 amd64_read_pci_cfg(pvt
->F3
, NBCAP
, &pvt
->nbcap
);
2687 read_dram_ctl_register(pvt
);
2689 for (range
= 0; range
< DRAM_RANGES
; range
++) {
2692 /* read settings for this DRAM range */
2693 read_dram_base_limit_regs(pvt
, range
);
2695 rw
= dram_rw(pvt
, range
);
2699 edac_dbg(1, " DRAM range[%d], base: 0x%016llx; limit: 0x%016llx\n",
2701 get_dram_base(pvt
, range
),
2702 get_dram_limit(pvt
, range
));
2704 edac_dbg(1, " IntlvEn=%s; Range access: %s%s IntlvSel=%d DstNode=%d\n",
2705 dram_intlv_en(pvt
, range
) ? "Enabled" : "Disabled",
2706 (rw
& 0x1) ? "R" : "-",
2707 (rw
& 0x2) ? "W" : "-",
2708 dram_intlv_sel(pvt
, range
),
2709 dram_dst_node(pvt
, range
));
2712 amd64_read_pci_cfg(pvt
->F1
, DHAR
, &pvt
->dhar
);
2713 amd64_read_dct_pci_cfg(pvt
, 0, DBAM0
, &pvt
->dbam0
);
2715 amd64_read_pci_cfg(pvt
->F3
, F10_ONLINE_SPARE
, &pvt
->online_spare
);
2717 amd64_read_dct_pci_cfg(pvt
, 0, DCLR0
, &pvt
->dclr0
);
2718 amd64_read_dct_pci_cfg(pvt
, 0, DCHR0
, &pvt
->dchr0
);
2720 if (!dct_ganging_enabled(pvt
)) {
2721 amd64_read_dct_pci_cfg(pvt
, 1, DCLR0
, &pvt
->dclr1
);
2722 amd64_read_dct_pci_cfg(pvt
, 1, DCHR0
, &pvt
->dchr1
);
2726 read_dct_base_mask(pvt
);
2728 determine_memory_type(pvt
);
2729 edac_dbg(1, " DIMM type: %s\n", edac_mem_types
[pvt
->dram_type
]);
2731 determine_ecc_sym_sz(pvt
);
2733 dump_misc_regs(pvt
);
2737 * NOTE: CPU Revision Dependent code
2740 * @csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1)
2741 * k8 private pointer to -->
2742 * DRAM Bank Address mapping register
2744 * DCL register where dual_channel_active is
2746 * The DBAM register consists of 4 sets of 4 bits each definitions:
2749 * 0-3 CSROWs 0 and 1
2750 * 4-7 CSROWs 2 and 3
2751 * 8-11 CSROWs 4 and 5
2752 * 12-15 CSROWs 6 and 7
2754 * Values range from: 0 to 15
2755 * The meaning of the values depends on CPU revision and dual-channel state,
2756 * see relevant BKDG more info.
2758 * The memory controller provides for total of only 8 CSROWs in its current
2759 * architecture. Each "pair" of CSROWs normally represents just one DIMM in
2760 * single channel or two (2) DIMMs in dual channel mode.
2762 * The following code logic collapses the various tables for CSROW based on CPU
2766 * The number of PAGE_SIZE pages on the specified CSROW number it
2770 static u32
get_csrow_nr_pages(struct amd64_pvt
*pvt
, u8 dct
, int csrow_nr_orig
)
2772 u32 dbam
= dct
? pvt
->dbam1
: pvt
->dbam0
;
2773 int csrow_nr
= csrow_nr_orig
;
2774 u32 cs_mode
, nr_pages
;
2779 cs_mode
= DBAM_DIMM(csrow_nr
, dbam
);
2781 nr_pages
= pvt
->ops
->dbam_to_cs(pvt
, dct
, cs_mode
, csrow_nr
);
2782 nr_pages
<<= 20 - PAGE_SHIFT
;
2784 edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
2785 csrow_nr_orig
, dct
, cs_mode
);
2786 edac_dbg(0, "nr_pages/channel: %u\n", nr_pages
);
2792 * Initialize the array of csrow attribute instances, based on the values
2793 * from pci config hardware registers.
2795 static int init_csrows(struct mem_ctl_info
*mci
)
2797 struct amd64_pvt
*pvt
= mci
->pvt_info
;
2798 enum edac_type edac_mode
= EDAC_NONE
;
2799 struct csrow_info
*csrow
;
2800 struct dimm_info
*dimm
;
2801 int i
, j
, empty
= 1;
2806 amd64_read_pci_cfg(pvt
->F3
, NBCFG
, &val
);
2810 edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2811 pvt
->mc_node_id
, val
,
2812 !!(val
& NBCFG_CHIPKILL
), !!(val
& NBCFG_ECC_ENABLE
));
2816 * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
2818 for_each_chip_select(i
, 0, pvt
) {
2819 bool row_dct0
= !!csrow_enabled(i
, 0, pvt
);
2820 bool row_dct1
= false;
2822 if (pvt
->fam
!= 0xf)
2823 row_dct1
= !!csrow_enabled(i
, 1, pvt
);
2825 if (!row_dct0
&& !row_dct1
)
2828 csrow
= mci
->csrows
[i
];
2831 edac_dbg(1, "MC node: %d, csrow: %d\n",
2832 pvt
->mc_node_id
, i
);
2835 nr_pages
= get_csrow_nr_pages(pvt
, 0, i
);
2836 csrow
->channels
[0]->dimm
->nr_pages
= nr_pages
;
2839 /* K8 has only one DCT */
2840 if (pvt
->fam
!= 0xf && row_dct1
) {
2841 int row_dct1_pages
= get_csrow_nr_pages(pvt
, 1, i
);
2843 csrow
->channels
[1]->dimm
->nr_pages
= row_dct1_pages
;
2844 nr_pages
+= row_dct1_pages
;
2847 edac_dbg(1, "Total csrow%d pages: %u\n", i
, nr_pages
);
2849 /* Determine DIMM ECC mode: */
2851 if (mci
->edac_ctl_cap
& EDAC_FLAG_S4ECD4ED
)
2852 edac_mode
= EDAC_S4ECD4ED
;
2853 else if (mci
->edac_ctl_cap
& EDAC_FLAG_SECDED
)
2854 edac_mode
= EDAC_SECDED
;
2856 } else if (pvt
->nbcfg
& NBCFG_ECC_ENABLE
) {
2857 edac_mode
= (pvt
->nbcfg
& NBCFG_CHIPKILL
)
2862 for (j
= 0; j
< pvt
->channel_count
; j
++) {
2863 dimm
= csrow
->channels
[j
]->dimm
;
2864 dimm
->mtype
= pvt
->dram_type
;
2865 dimm
->edac_mode
= edac_mode
;
2873 /* get all cores on this DCT */
2874 static void get_cpus_on_this_dct_cpumask(struct cpumask
*mask
, u16 nid
)
2878 for_each_online_cpu(cpu
)
2879 if (amd_get_nb_id(cpu
) == nid
)
2880 cpumask_set_cpu(cpu
, mask
);
2883 /* check MCG_CTL on all the cpus on this node */
2884 static bool nb_mce_bank_enabled_on_node(u16 nid
)
2890 if (!zalloc_cpumask_var(&mask
, GFP_KERNEL
)) {
2891 amd64_warn("%s: Error allocating mask\n", __func__
);
2895 get_cpus_on_this_dct_cpumask(mask
, nid
);
2897 rdmsr_on_cpus(mask
, MSR_IA32_MCG_CTL
, msrs
);
2899 for_each_cpu(cpu
, mask
) {
2900 struct msr
*reg
= per_cpu_ptr(msrs
, cpu
);
2901 nbe
= reg
->l
& MSR_MCGCTL_NBE
;
2903 edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2905 (nbe
? "enabled" : "disabled"));
2913 free_cpumask_var(mask
);
2917 static int toggle_ecc_err_reporting(struct ecc_settings
*s
, u16 nid
, bool on
)
2919 cpumask_var_t cmask
;
2922 if (!zalloc_cpumask_var(&cmask
, GFP_KERNEL
)) {
2923 amd64_warn("%s: error allocating mask\n", __func__
);
2927 get_cpus_on_this_dct_cpumask(cmask
, nid
);
2929 rdmsr_on_cpus(cmask
, MSR_IA32_MCG_CTL
, msrs
);
2931 for_each_cpu(cpu
, cmask
) {
2933 struct msr
*reg
= per_cpu_ptr(msrs
, cpu
);
2936 if (reg
->l
& MSR_MCGCTL_NBE
)
2937 s
->flags
.nb_mce_enable
= 1;
2939 reg
->l
|= MSR_MCGCTL_NBE
;
2942 * Turn off NB MCE reporting only when it was off before
2944 if (!s
->flags
.nb_mce_enable
)
2945 reg
->l
&= ~MSR_MCGCTL_NBE
;
2948 wrmsr_on_cpus(cmask
, MSR_IA32_MCG_CTL
, msrs
);
2950 free_cpumask_var(cmask
);
2955 static bool enable_ecc_error_reporting(struct ecc_settings
*s
, u16 nid
,
2959 u32 value
, mask
= 0x3; /* UECC/CECC enable */
2961 if (toggle_ecc_err_reporting(s
, nid
, ON
)) {
2962 amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
2966 amd64_read_pci_cfg(F3
, NBCTL
, &value
);
2968 s
->old_nbctl
= value
& mask
;
2969 s
->nbctl_valid
= true;
2972 amd64_write_pci_cfg(F3
, NBCTL
, value
);
2974 amd64_read_pci_cfg(F3
, NBCFG
, &value
);
2976 edac_dbg(0, "1: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
2977 nid
, value
, !!(value
& NBCFG_ECC_ENABLE
));
2979 if (!(value
& NBCFG_ECC_ENABLE
)) {
2980 amd64_warn("DRAM ECC disabled on this node, enabling...\n");
2982 s
->flags
.nb_ecc_prev
= 0;
2984 /* Attempt to turn on DRAM ECC Enable */
2985 value
|= NBCFG_ECC_ENABLE
;
2986 amd64_write_pci_cfg(F3
, NBCFG
, value
);
2988 amd64_read_pci_cfg(F3
, NBCFG
, &value
);
2990 if (!(value
& NBCFG_ECC_ENABLE
)) {
2991 amd64_warn("Hardware rejected DRAM ECC enable,"
2992 "check memory DIMM configuration.\n");
2995 amd64_info("Hardware accepted DRAM ECC Enable\n");
2998 s
->flags
.nb_ecc_prev
= 1;
3001 edac_dbg(0, "2: node %d, NBCFG=0x%08x[DramEccEn: %d]\n",
3002 nid
, value
, !!(value
& NBCFG_ECC_ENABLE
));
3007 static void restore_ecc_error_reporting(struct ecc_settings
*s
, u16 nid
,
3010 u32 value
, mask
= 0x3; /* UECC/CECC enable */
3012 if (!s
->nbctl_valid
)
3015 amd64_read_pci_cfg(F3
, NBCTL
, &value
);
3017 value
|= s
->old_nbctl
;
3019 amd64_write_pci_cfg(F3
, NBCTL
, value
);
3021 /* restore previous BIOS DRAM ECC "off" setting we force-enabled */
3022 if (!s
->flags
.nb_ecc_prev
) {
3023 amd64_read_pci_cfg(F3
, NBCFG
, &value
);
3024 value
&= ~NBCFG_ECC_ENABLE
;
3025 amd64_write_pci_cfg(F3
, NBCFG
, value
);
3028 /* restore the NB Enable MCGCTL bit */
3029 if (toggle_ecc_err_reporting(s
, nid
, OFF
))
3030 amd64_warn("Error restoring NB MCGCTL settings!\n");
3034 * EDAC requires that the BIOS have ECC enabled before
3035 * taking over the processing of ECC errors. A command line
3036 * option allows to force-enable hardware ECC later in
3037 * enable_ecc_error_reporting().
3039 static const char *ecc_msg
=
3040 "ECC disabled in the BIOS or no ECC capability, module will not load.\n"
3041 " Either enable ECC checking or force module loading by setting "
3042 "'ecc_enable_override'.\n"
3043 " (Note that use of the override may cause unknown side effects.)\n";
3045 static bool ecc_enabled(struct pci_dev
*F3
, u16 nid
)
3047 bool nb_mce_en
= false;
3051 if (boot_cpu_data
.x86
>= 0x17) {
3052 u8 umc_en_mask
= 0, ecc_en_mask
= 0;
3054 for (i
= 0; i
< NUM_UMCS
; i
++) {
3055 u32 base
= get_umc_base(i
);
3057 /* Only check enabled UMCs. */
3058 if (amd_smn_read(nid
, base
+ UMCCH_SDP_CTRL
, &value
))
3061 if (!(value
& UMC_SDP_INIT
))
3064 umc_en_mask
|= BIT(i
);
3066 if (amd_smn_read(nid
, base
+ UMCCH_UMC_CAP_HI
, &value
))
3069 if (value
& UMC_ECC_ENABLED
)
3070 ecc_en_mask
|= BIT(i
);
3073 /* Check whether at least one UMC is enabled: */
3075 ecc_en
= umc_en_mask
== ecc_en_mask
;
3077 edac_dbg(0, "Node %d: No enabled UMCs.\n", nid
);
3079 /* Assume UMC MCA banks are enabled. */
3082 amd64_read_pci_cfg(F3
, NBCFG
, &value
);
3084 ecc_en
= !!(value
& NBCFG_ECC_ENABLE
);
3086 nb_mce_en
= nb_mce_bank_enabled_on_node(nid
);
3088 edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n",
3089 MSR_IA32_MCG_CTL
, nid
);
3092 amd64_info("Node %d: DRAM ECC %s.\n",
3093 nid
, (ecc_en
? "enabled" : "disabled"));
3095 if (!ecc_en
|| !nb_mce_en
) {
3096 amd64_info("%s", ecc_msg
);
3103 f17h_determine_edac_ctl_cap(struct mem_ctl_info
*mci
, struct amd64_pvt
*pvt
)
3105 u8 i
, ecc_en
= 1, cpk_en
= 1, dev_x4
= 1, dev_x16
= 1;
3107 for (i
= 0; i
< NUM_UMCS
; i
++) {
3108 if (pvt
->umc
[i
].sdp_ctrl
& UMC_SDP_INIT
) {
3109 ecc_en
&= !!(pvt
->umc
[i
].umc_cap_hi
& UMC_ECC_ENABLED
);
3110 cpk_en
&= !!(pvt
->umc
[i
].umc_cap_hi
& UMC_ECC_CHIPKILL_CAP
);
3112 dev_x4
&= !!(pvt
->umc
[i
].dimm_cfg
& BIT(6));
3113 dev_x16
&= !!(pvt
->umc
[i
].dimm_cfg
& BIT(7));
3117 /* Set chipkill only if ECC is enabled: */
3119 mci
->edac_ctl_cap
|= EDAC_FLAG_SECDED
;
3125 mci
->edac_ctl_cap
|= EDAC_FLAG_S4ECD4ED
;
3127 mci
->edac_ctl_cap
|= EDAC_FLAG_S16ECD16ED
;
3129 mci
->edac_ctl_cap
|= EDAC_FLAG_S8ECD8ED
;
3133 static void setup_mci_misc_attrs(struct mem_ctl_info
*mci
,
3134 struct amd64_family_type
*fam
)
3136 struct amd64_pvt
*pvt
= mci
->pvt_info
;
3138 mci
->mtype_cap
= MEM_FLAG_DDR2
| MEM_FLAG_RDDR2
;
3139 mci
->edac_ctl_cap
= EDAC_FLAG_NONE
;
3142 f17h_determine_edac_ctl_cap(mci
, pvt
);
3144 if (pvt
->nbcap
& NBCAP_SECDED
)
3145 mci
->edac_ctl_cap
|= EDAC_FLAG_SECDED
;
3147 if (pvt
->nbcap
& NBCAP_CHIPKILL
)
3148 mci
->edac_ctl_cap
|= EDAC_FLAG_S4ECD4ED
;
3151 mci
->edac_cap
= determine_edac_cap(pvt
);
3152 mci
->mod_name
= EDAC_MOD_STR
;
3153 mci
->ctl_name
= fam
->ctl_name
;
3154 mci
->dev_name
= pci_name(pvt
->F3
);
3155 mci
->ctl_page_to_phys
= NULL
;
3157 /* memory scrubber interface */
3158 mci
->set_sdram_scrub_rate
= set_scrub_rate
;
3159 mci
->get_sdram_scrub_rate
= get_scrub_rate
;
3163 * returns a pointer to the family descriptor on success, NULL otherwise.
3165 static struct amd64_family_type
*per_family_init(struct amd64_pvt
*pvt
)
3167 struct amd64_family_type
*fam_type
= NULL
;
3169 pvt
->ext_model
= boot_cpu_data
.x86_model
>> 4;
3170 pvt
->stepping
= boot_cpu_data
.x86_stepping
;
3171 pvt
->model
= boot_cpu_data
.x86_model
;
3172 pvt
->fam
= boot_cpu_data
.x86
;
3176 fam_type
= &family_types
[K8_CPUS
];
3177 pvt
->ops
= &family_types
[K8_CPUS
].ops
;
3181 fam_type
= &family_types
[F10_CPUS
];
3182 pvt
->ops
= &family_types
[F10_CPUS
].ops
;
3186 if (pvt
->model
== 0x30) {
3187 fam_type
= &family_types
[F15_M30H_CPUS
];
3188 pvt
->ops
= &family_types
[F15_M30H_CPUS
].ops
;
3190 } else if (pvt
->model
== 0x60) {
3191 fam_type
= &family_types
[F15_M60H_CPUS
];
3192 pvt
->ops
= &family_types
[F15_M60H_CPUS
].ops
;
3196 fam_type
= &family_types
[F15_CPUS
];
3197 pvt
->ops
= &family_types
[F15_CPUS
].ops
;
3201 if (pvt
->model
== 0x30) {
3202 fam_type
= &family_types
[F16_M30H_CPUS
];
3203 pvt
->ops
= &family_types
[F16_M30H_CPUS
].ops
;
3206 fam_type
= &family_types
[F16_CPUS
];
3207 pvt
->ops
= &family_types
[F16_CPUS
].ops
;
3211 if (pvt
->model
>= 0x10 && pvt
->model
<= 0x2f) {
3212 fam_type
= &family_types
[F17_M10H_CPUS
];
3213 pvt
->ops
= &family_types
[F17_M10H_CPUS
].ops
;
3216 fam_type
= &family_types
[F17_CPUS
];
3217 pvt
->ops
= &family_types
[F17_CPUS
].ops
;
3221 amd64_err("Unsupported family!\n");
3225 amd64_info("%s %sdetected (node %d).\n", fam_type
->ctl_name
,
3227 (pvt
->ext_model
>= K8_REV_F
? "revF or later "
3228 : "revE or earlier ")
3229 : ""), pvt
->mc_node_id
);
3233 static const struct attribute_group
*amd64_edac_attr_groups
[] = {
3234 #ifdef CONFIG_EDAC_DEBUG
3235 &amd64_edac_dbg_group
,
3237 #ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
3238 &amd64_edac_inj_group
,
3243 static int init_one_instance(unsigned int nid
)
3245 struct pci_dev
*F3
= node_to_amd_nb(nid
)->misc
;
3246 struct amd64_family_type
*fam_type
= NULL
;
3247 struct mem_ctl_info
*mci
= NULL
;
3248 struct edac_mc_layer layers
[2];
3249 struct amd64_pvt
*pvt
= NULL
;
3250 u16 pci_id1
, pci_id2
;
3254 pvt
= kzalloc(sizeof(struct amd64_pvt
), GFP_KERNEL
);
3258 pvt
->mc_node_id
= nid
;
3262 fam_type
= per_family_init(pvt
);
3266 if (pvt
->fam
>= 0x17) {
3267 pvt
->umc
= kcalloc(NUM_UMCS
, sizeof(struct amd64_umc
), GFP_KERNEL
);
3273 pci_id1
= fam_type
->f0_id
;
3274 pci_id2
= fam_type
->f6_id
;
3276 pci_id1
= fam_type
->f1_id
;
3277 pci_id2
= fam_type
->f2_id
;
3280 err
= reserve_mc_sibling_devs(pvt
, pci_id1
, pci_id2
);
3287 * We need to determine how many memory channels there are. Then use
3288 * that information for calculating the size of the dynamic instance
3289 * tables in the 'mci' structure.
3292 pvt
->channel_count
= pvt
->ops
->early_channel_count(pvt
);
3293 if (pvt
->channel_count
< 0)
3297 layers
[0].type
= EDAC_MC_LAYER_CHIP_SELECT
;
3298 layers
[0].size
= pvt
->csels
[0].b_cnt
;
3299 layers
[0].is_virt_csrow
= true;
3300 layers
[1].type
= EDAC_MC_LAYER_CHANNEL
;
3303 * Always allocate two channels since we can have setups with DIMMs on
3304 * only one channel. Also, this simplifies handling later for the price
3305 * of a couple of KBs tops.
3308 layers
[1].is_virt_csrow
= false;
3310 mci
= edac_mc_alloc(nid
, ARRAY_SIZE(layers
), layers
, 0);
3314 mci
->pvt_info
= pvt
;
3315 mci
->pdev
= &pvt
->F3
->dev
;
3317 setup_mci_misc_attrs(mci
, fam_type
);
3319 if (init_csrows(mci
))
3320 mci
->edac_cap
= EDAC_FLAG_NONE
;
3323 if (edac_mc_add_mc_with_groups(mci
, amd64_edac_attr_groups
)) {
3324 edac_dbg(1, "failed edac_mc_add_mc()\n");
3334 free_mc_sibling_devs(pvt
);
3337 if (pvt
->fam
>= 0x17)
3347 static int probe_one_instance(unsigned int nid
)
3349 struct pci_dev
*F3
= node_to_amd_nb(nid
)->misc
;
3350 struct ecc_settings
*s
;
3354 s
= kzalloc(sizeof(struct ecc_settings
), GFP_KERNEL
);
3360 if (!ecc_enabled(F3
, nid
)) {
3363 if (!ecc_enable_override
)
3366 if (boot_cpu_data
.x86
>= 0x17) {
3367 amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS.");
3370 amd64_warn("Forcing ECC on!\n");
3372 if (!enable_ecc_error_reporting(s
, nid
, F3
))
3376 ret
= init_one_instance(nid
);
3378 amd64_err("Error probing instance: %d\n", nid
);
3380 if (boot_cpu_data
.x86
< 0x17)
3381 restore_ecc_error_reporting(s
, nid
, F3
);
3390 ecc_stngs
[nid
] = NULL
;
3396 static void remove_one_instance(unsigned int nid
)
3398 struct pci_dev
*F3
= node_to_amd_nb(nid
)->misc
;
3399 struct ecc_settings
*s
= ecc_stngs
[nid
];
3400 struct mem_ctl_info
*mci
;
3401 struct amd64_pvt
*pvt
;
3403 mci
= find_mci_by_dev(&F3
->dev
);
3406 /* Remove from EDAC CORE tracking list */
3407 mci
= edac_mc_del_mc(&F3
->dev
);
3411 pvt
= mci
->pvt_info
;
3413 restore_ecc_error_reporting(s
, nid
, F3
);
3415 free_mc_sibling_devs(pvt
);
3417 kfree(ecc_stngs
[nid
]);
3418 ecc_stngs
[nid
] = NULL
;
3420 /* Free the EDAC CORE resources */
3421 mci
->pvt_info
= NULL
;
3427 static void setup_pci_device(void)
3429 struct mem_ctl_info
*mci
;
3430 struct amd64_pvt
*pvt
;
3435 mci
= edac_mc_find(0);
3439 pvt
= mci
->pvt_info
;
3441 pci_ctl
= edac_pci_create_generic_ctl(&pvt
->F0
->dev
, EDAC_MOD_STR
);
3443 pci_ctl
= edac_pci_create_generic_ctl(&pvt
->F2
->dev
, EDAC_MOD_STR
);
3445 pr_warn("%s(): Unable to create PCI control\n", __func__
);
3446 pr_warn("%s(): PCI error report via EDAC not set\n", __func__
);
3450 static const struct x86_cpu_id amd64_cpuids
[] = {
3451 { X86_VENDOR_AMD
, 0xF, X86_MODEL_ANY
, X86_FEATURE_ANY
, 0 },
3452 { X86_VENDOR_AMD
, 0x10, X86_MODEL_ANY
, X86_FEATURE_ANY
, 0 },
3453 { X86_VENDOR_AMD
, 0x15, X86_MODEL_ANY
, X86_FEATURE_ANY
, 0 },
3454 { X86_VENDOR_AMD
, 0x16, X86_MODEL_ANY
, X86_FEATURE_ANY
, 0 },
3455 { X86_VENDOR_AMD
, 0x17, X86_MODEL_ANY
, X86_FEATURE_ANY
, 0 },
3458 MODULE_DEVICE_TABLE(x86cpu
, amd64_cpuids
);
3460 static int __init
amd64_edac_init(void)
3466 owner
= edac_get_owner();
3467 if (owner
&& strncmp(owner
, EDAC_MOD_STR
, sizeof(EDAC_MOD_STR
)))
3470 if (!x86_match_cpu(amd64_cpuids
))
3473 if (amd_cache_northbridges() < 0)
3479 ecc_stngs
= kcalloc(amd_nb_num(), sizeof(ecc_stngs
[0]), GFP_KERNEL
);
3483 msrs
= msrs_alloc();
3487 for (i
= 0; i
< amd_nb_num(); i
++) {
3488 err
= probe_one_instance(i
);
3490 /* unwind properly */
3492 remove_one_instance(i
);
3498 if (!edac_has_mcs()) {
3503 /* register stuff with EDAC MCE */
3504 if (report_gart_errors
)
3505 amd_report_gart_errors(true);
3507 if (boot_cpu_data
.x86
>= 0x17)
3508 amd_register_ecc_decoder(decode_umc_error
);
3510 amd_register_ecc_decoder(decode_bus_error
);
3514 #ifdef CONFIG_X86_32
3515 amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR
);
3518 printk(KERN_INFO
"AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION
);
3533 static void __exit
amd64_edac_exit(void)
3538 edac_pci_release_generic_ctl(pci_ctl
);
3540 /* unregister from EDAC MCE */
3541 amd_report_gart_errors(false);
3543 if (boot_cpu_data
.x86
>= 0x17)
3544 amd_unregister_ecc_decoder(decode_umc_error
);
3546 amd_unregister_ecc_decoder(decode_bus_error
);
3548 for (i
= 0; i
< amd_nb_num(); i
++)
3549 remove_one_instance(i
);
3558 module_init(amd64_edac_init
);
3559 module_exit(amd64_edac_exit
);
3561 MODULE_LICENSE("GPL");
3562 MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, "
3563 "Dave Peterson, Thayne Harbaugh");
3564 MODULE_DESCRIPTION("MC support for AMD64 memory controllers - "
3565 EDAC_AMD64_VERSION
);
3567 module_param(edac_op_state
, int, 0444);
3568 MODULE_PARM_DESC(edac_op_state
, "EDAC Error Reporting state: 0=Poll,1=NMI");