2 * Intel 7300 class Memory Controllers kernel module (Clarksboro)
4 * This file may be distributed under the terms of the
5 * GNU General Public License version 2 only.
7 * Copyright (c) 2010 by:
8 * Mauro Carvalho Chehab
10 * Red Hat Inc. http://www.redhat.com
12 * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet
13 * http://www.intel.com/Assets/PDF/datasheet/318082.pdf
15 * TODO: The chipset allow checking for PCI Express errors also. Currently,
16 * the driver covers only memory error errors
18 * This driver uses "csrows" EDAC attribute to represent DIMM slot#
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/pci.h>
24 #include <linux/pci_ids.h>
25 #include <linux/slab.h>
26 #include <linux/edac.h>
27 #include <linux/mmzone.h>
29 #include "edac_core.h"
32 * Alter this version for the I7300 module when modifications are made
34 #define I7300_REVISION " Ver: 1.0.0"
36 #define EDAC_MOD_STR "i7300_edac"
38 #define i7300_printk(level, fmt, arg...) \
39 edac_printk(level, "i7300", fmt, ##arg)
41 #define i7300_mc_printk(mci, level, fmt, arg...) \
42 edac_mc_chipset_printk(mci, level, "i7300", fmt, ##arg)
44 /***********************************************
45 * i7300 Limit constants Structs and static vars
46 ***********************************************/
49 * Memory topology is organized as:
50 * Branch 0 - 2 channels: channels 0 and 1 (FDB0 PCI dev 21.0)
51 * Branch 1 - 2 channels: channels 2 and 3 (FDB1 PCI dev 22.0)
52 * Each channel can have to 8 DIMM sets (called as SLOTS)
53 * Slots should generally be filled in pairs
54 * Except on Single Channel mode of operation
55 * just slot 0/channel0 filled on this mode
56 * On normal operation mode, the two channels on a branch should be
57 * filled together for the same SLOT#
58 * When in mirrored mode, Branch 1 replicate memory at Branch 0, so, the four
59 * channels on both branches should be filled
62 /* Limits for i7300 */
64 #define MAX_BRANCHES 2
65 #define MAX_CH_PER_BRANCH 2
66 #define MAX_CHANNELS (MAX_CH_PER_BRANCH * MAX_BRANCHES)
69 #define to_channel(ch, branch) ((((branch)) << 1) | (ch))
71 #define to_csrow(slot, ch, branch) \
72 (to_channel(ch, branch) | ((slot) << 2))
74 /* Device name and register DID (Device ID) */
75 struct i7300_dev_info
{
76 const char *ctl_name
; /* name for this device */
77 u16 fsb_mapping_errors
; /* DID for the branchmap,control */
80 /* Table of devices attributes supported by this driver */
81 static const struct i7300_dev_info i7300_devs
[] = {
84 .fsb_mapping_errors
= PCI_DEVICE_ID_INTEL_I7300_MCH_ERR
,
88 struct i7300_dimm_info
{
89 int megabytes
; /* size, 0 means not present */
92 /* driver private data structure */
94 struct pci_dev
*pci_dev_16_0_fsb_ctlr
; /* 16.0 */
95 struct pci_dev
*pci_dev_16_1_fsb_addr_map
; /* 16.1 */
96 struct pci_dev
*pci_dev_16_2_fsb_err_regs
; /* 16.2 */
97 struct pci_dev
*pci_dev_2x_0_fbd_branch
[MAX_BRANCHES
]; /* 21.0 and 22.0 */
99 u16 tolm
; /* top of low memory */
100 u64 ambase
; /* AMB BAR */
102 u32 mc_settings
; /* Report several settings */
105 u16 mir
[MAX_MIR
]; /* Memory Interleave Reg*/
107 u16 mtr
[MAX_SLOTS
][MAX_BRANCHES
]; /* Memory Technlogy Reg */
108 u16 ambpresent
[MAX_CHANNELS
]; /* AMB present regs */
110 /* DIMM information matrix, allocating architecture maximums */
111 struct i7300_dimm_info dimm_info
[MAX_SLOTS
][MAX_CHANNELS
];
113 /* Temporary buffer for use when preparing error messages */
114 char *tmp_prt_buffer
;
117 /* FIXME: Why do we need to have this static? */
118 static struct edac_pci_ctl_info
*i7300_pci
;
120 /***************************************************
121 * i7300 Register definitions for memory enumeration
122 ***************************************************/
126 * Function 0: System Address (not documented)
127 * Function 1: Memory Branch Map, Control, Errors Register
130 /* OFFSETS for Function 0 */
131 #define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */
132 #define MAXCH 0x56 /* Max Channel Number */
133 #define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */
135 /* OFFSETS for Function 1 */
136 #define MC_SETTINGS 0x40
137 #define IS_MIRRORED(mc) ((mc) & (1 << 16))
138 #define IS_ECC_ENABLED(mc) ((mc) & (1 << 5))
139 #define IS_RETRY_ENABLED(mc) ((mc) & (1 << 31))
140 #define IS_SCRBALGO_ENHANCED(mc) ((mc) & (1 << 8))
142 #define MC_SETTINGS_A 0x58
143 #define IS_SINGLE_MODE(mca) ((mca) & (1 << 14))
152 * Note: Other Intel EDAC drivers use AMBPRESENT to identify if the available
153 * memory. From datasheet item 7.3.1 (FB-DIMM technology & organization), it
154 * seems that we cannot use this information directly for the same usage.
155 * Each memory slot may have up to 2 AMB interfaces, one for income and another
156 * for outcome interface to the next slot.
157 * For now, the driver just stores the AMB present registers, but rely only at
158 * the MTR info to detect memory.
159 * Datasheet is also not clear about how to map each AMBPRESENT registers to
160 * one of the 4 available channels.
162 #define AMBPRESENT_0 0x64
163 #define AMBPRESENT_1 0x66
165 static const u16 mtr_regs
[MAX_SLOTS
] = {
166 0x80, 0x84, 0x88, 0x8c,
167 0x82, 0x86, 0x8a, 0x8e
171 * Defines to extract the vaious fields from the
172 * MTRx - Memory Technology Registers
174 #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 8))
175 #define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 7))
176 #define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 6)) ? 8 : 4)
177 #define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 5)) ? 8 : 4)
178 #define MTR_DIMM_RANKS(mtr) (((mtr) & (1 << 4)) ? 1 : 0)
179 #define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3)
180 #define MTR_DRAM_BANKS_ADDR_BITS 2
181 #define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13)
182 #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3)
183 #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10)
185 /************************************************
186 * i7300 Register definitions for error detection
187 ************************************************/
190 * Device 16.1: FBD Error Registers
192 #define FERR_FAT_FBD 0x98
193 static const char *ferr_fat_fbd_name
[] = {
194 [22] = "Non-Redundant Fast Reset Timeout",
195 [2] = ">Tmid Thermal event with intelligent throttling disabled",
196 [1] = "Memory or FBD configuration CRC read error",
197 [0] = "Memory Write error on non-redundant retry or "
198 "FBD configuration Write error on retry",
200 #define GET_FBD_FAT_IDX(fbderr) (((fbderr) >> 28) & 3)
201 #define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 22))
203 #define FERR_NF_FBD 0xa0
204 static const char *ferr_nf_fbd_name
[] = {
205 [24] = "DIMM-Spare Copy Completed",
206 [23] = "DIMM-Spare Copy Initiated",
207 [22] = "Redundant Fast Reset Timeout",
208 [21] = "Memory Write error on redundant retry",
209 [18] = "SPD protocol Error",
210 [17] = "FBD Northbound parity error on FBD Sync Status",
211 [16] = "Correctable Patrol Data ECC",
212 [15] = "Correctable Resilver- or Spare-Copy Data ECC",
213 [14] = "Correctable Mirrored Demand Data ECC",
214 [13] = "Correctable Non-Mirrored Demand Data ECC",
215 [11] = "Memory or FBD configuration CRC read error",
216 [10] = "FBD Configuration Write error on first attempt",
217 [9] = "Memory Write error on first attempt",
218 [8] = "Non-Aliased Uncorrectable Patrol Data ECC",
219 [7] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
220 [6] = "Non-Aliased Uncorrectable Mirrored Demand Data ECC",
221 [5] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
222 [4] = "Aliased Uncorrectable Patrol Data ECC",
223 [3] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
224 [2] = "Aliased Uncorrectable Mirrored Demand Data ECC",
225 [1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
226 [0] = "Uncorrectable Data ECC on Replay",
228 #define GET_FBD_NF_IDX(fbderr) (((fbderr) >> 28) & 3)
229 #define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\
230 (1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\
231 (1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\
232 (1 << 9) | (1 << 8) | (1 << 7) | (1 << 6) |\
233 (1 << 5) | (1 << 4) | (1 << 3) | (1 << 2) |\
236 #define EMASK_FBD 0xa8
237 #define EMASK_FBD_ERR_MASK ((1 << 27) | (1 << 26) | (1 << 25) | (1 << 24) |\
238 (1 << 22) | (1 << 21) | (1 << 20) | (1 << 19) |\
239 (1 << 18) | (1 << 17) | (1 << 16) | (1 << 14) |\
240 (1 << 13) | (1 << 12) | (1 << 11) | (1 << 10) |\
241 (1 << 9) | (1 << 8) | (1 << 7) | (1 << 6) |\
242 (1 << 5) | (1 << 4) | (1 << 3) | (1 << 2) |\
246 * Device 16.2: Global Error Registers
249 #define FERR_GLOBAL_HI 0x48
250 static const char *ferr_global_hi_name
[] = {
251 [3] = "FSB 3 Fatal Error",
252 [2] = "FSB 2 Fatal Error",
253 [1] = "FSB 1 Fatal Error",
254 [0] = "FSB 0 Fatal Error",
256 #define ferr_global_hi_is_fatal(errno) 1
258 #define FERR_GLOBAL_LO 0x40
259 static const char *ferr_global_lo_name
[] = {
260 [31] = "Internal MCH Fatal Error",
261 [30] = "Intel QuickData Technology Device Fatal Error",
262 [29] = "FSB1 Fatal Error",
263 [28] = "FSB0 Fatal Error",
264 [27] = "FBD Channel 3 Fatal Error",
265 [26] = "FBD Channel 2 Fatal Error",
266 [25] = "FBD Channel 1 Fatal Error",
267 [24] = "FBD Channel 0 Fatal Error",
268 [23] = "PCI Express Device 7Fatal Error",
269 [22] = "PCI Express Device 6 Fatal Error",
270 [21] = "PCI Express Device 5 Fatal Error",
271 [20] = "PCI Express Device 4 Fatal Error",
272 [19] = "PCI Express Device 3 Fatal Error",
273 [18] = "PCI Express Device 2 Fatal Error",
274 [17] = "PCI Express Device 1 Fatal Error",
275 [16] = "ESI Fatal Error",
276 [15] = "Internal MCH Non-Fatal Error",
277 [14] = "Intel QuickData Technology Device Non Fatal Error",
278 [13] = "FSB1 Non-Fatal Error",
279 [12] = "FSB 0 Non-Fatal Error",
280 [11] = "FBD Channel 3 Non-Fatal Error",
281 [10] = "FBD Channel 2 Non-Fatal Error",
282 [9] = "FBD Channel 1 Non-Fatal Error",
283 [8] = "FBD Channel 0 Non-Fatal Error",
284 [7] = "PCI Express Device 7 Non-Fatal Error",
285 [6] = "PCI Express Device 6 Non-Fatal Error",
286 [5] = "PCI Express Device 5 Non-Fatal Error",
287 [4] = "PCI Express Device 4 Non-Fatal Error",
288 [3] = "PCI Express Device 3 Non-Fatal Error",
289 [2] = "PCI Express Device 2 Non-Fatal Error",
290 [1] = "PCI Express Device 1 Non-Fatal Error",
291 [0] = "ESI Non-Fatal Error",
293 #define ferr_global_lo_is_fatal(errno) ((errno < 16) ? 0 : 1)
295 #define NRECMEMA 0xbe
296 #define NRECMEMA_BANK(v) (((v) >> 12) & 7)
297 #define NRECMEMA_RANK(v) (((v) >> 8) & 15)
299 #define NRECMEMB 0xc0
300 #define NRECMEMB_IS_WR(v) ((v) & (1 << 31))
301 #define NRECMEMB_CAS(v) (((v) >> 16) & 0x1fff)
302 #define NRECMEMB_RAS(v) ((v) & 0xffff)
307 #define IS_SECOND_CH(v) ((v) * (1 << 17))
310 #define RECMEMA_BANK(v) (((v) >> 12) & 7)
311 #define RECMEMA_RANK(v) (((v) >> 8) & 15)
314 #define RECMEMB_IS_WR(v) ((v) & (1 << 31))
315 #define RECMEMB_CAS(v) (((v) >> 16) & 0x1fff)
316 #define RECMEMB_RAS(v) ((v) & 0xffff)
318 /********************************************
319 * i7300 Functions related to error detection
320 ********************************************/
323 * get_err_from_table() - Gets the error message from a table
324 * @table: table name (array of char *)
325 * @size: number of elements at the table
326 * @pos: position of the element to be returned
328 * This is a small routine that gets the pos-th element of a table. If the
329 * element doesn't exist (or it is empty), it returns "reserved".
330 * Instead of calling it directly, the better is to call via the macro
331 * GET_ERR_FROM_TABLE(), that automatically checks the table size via
334 static const char *get_err_from_table(const char *table
[], int size
, int pos
)
336 if (unlikely(pos
>= size
))
339 if (unlikely(!table
[pos
]))
345 #define GET_ERR_FROM_TABLE(table, pos) \
346 get_err_from_table(table, ARRAY_SIZE(table), pos)
349 * i7300_process_error_global() - Retrieve the hardware error information from
350 * the hardware global error registers and
352 * @mci: struct mem_ctl_info pointer
354 static void i7300_process_error_global(struct mem_ctl_info
*mci
)
356 struct i7300_pvt
*pvt
;
357 u32 errnum
, error_reg
;
358 unsigned long errors
;
359 const char *specific
;
364 /* read in the 1st FATAL error register */
365 pci_read_config_dword(pvt
->pci_dev_16_2_fsb_err_regs
,
366 FERR_GLOBAL_HI
, &error_reg
);
367 if (unlikely(error_reg
)) {
369 errnum
= find_first_bit(&errors
,
370 ARRAY_SIZE(ferr_global_hi_name
));
371 specific
= GET_ERR_FROM_TABLE(ferr_global_hi_name
, errnum
);
372 is_fatal
= ferr_global_hi_is_fatal(errnum
);
374 /* Clear the error bit */
375 pci_write_config_dword(pvt
->pci_dev_16_2_fsb_err_regs
,
376 FERR_GLOBAL_HI
, error_reg
);
381 pci_read_config_dword(pvt
->pci_dev_16_2_fsb_err_regs
,
382 FERR_GLOBAL_LO
, &error_reg
);
383 if (unlikely(error_reg
)) {
385 errnum
= find_first_bit(&errors
,
386 ARRAY_SIZE(ferr_global_lo_name
));
387 specific
= GET_ERR_FROM_TABLE(ferr_global_lo_name
, errnum
);
388 is_fatal
= ferr_global_lo_is_fatal(errnum
);
390 /* Clear the error bit */
391 pci_write_config_dword(pvt
->pci_dev_16_2_fsb_err_regs
,
392 FERR_GLOBAL_LO
, error_reg
);
399 i7300_mc_printk(mci
, KERN_EMERG
, "%s misc error: %s\n",
400 is_fatal
? "Fatal" : "NOT fatal", specific
);
404 * i7300_process_fbd_error() - Retrieve the hardware error information from
405 * the FBD error registers and sends it via
406 * EDAC error API calls
407 * @mci: struct mem_ctl_info pointer
409 static void i7300_process_fbd_error(struct mem_ctl_info
*mci
)
411 struct i7300_pvt
*pvt
;
412 u32 errnum
, value
, error_reg
;
414 unsigned branch
, channel
, bank
, rank
, cas
, ras
;
417 unsigned long errors
;
418 const char *specific
;
423 /* read in the 1st FATAL error register */
424 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
425 FERR_FAT_FBD
, &error_reg
);
426 if (unlikely(error_reg
& FERR_FAT_FBD_ERR_MASK
)) {
427 errors
= error_reg
& FERR_FAT_FBD_ERR_MASK
;
428 errnum
= find_first_bit(&errors
,
429 ARRAY_SIZE(ferr_fat_fbd_name
));
430 specific
= GET_ERR_FROM_TABLE(ferr_fat_fbd_name
, errnum
);
431 branch
= (GET_FBD_FAT_IDX(error_reg
) == 2) ? 1 : 0;
433 pci_read_config_word(pvt
->pci_dev_16_1_fsb_addr_map
,
435 bank
= NRECMEMA_BANK(val16
);
436 rank
= NRECMEMA_RANK(val16
);
438 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
440 is_wr
= NRECMEMB_IS_WR(value
);
441 cas
= NRECMEMB_CAS(value
);
442 ras
= NRECMEMB_RAS(value
);
444 /* Clean the error register */
445 pci_write_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
446 FERR_FAT_FBD
, error_reg
);
448 snprintf(pvt
->tmp_prt_buffer
, PAGE_SIZE
,
449 "Bank=%d RAS=%d CAS=%d Err=0x%lx (%s))",
450 bank
, ras
, cas
, errors
, specific
);
452 edac_mc_handle_error(HW_EVENT_ERR_FATAL
, mci
, 1, 0, 0, 0,
454 is_wr
? "Write error" : "Read error",
455 pvt
->tmp_prt_buffer
);
459 /* read in the 1st NON-FATAL error register */
460 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
461 FERR_NF_FBD
, &error_reg
);
462 if (unlikely(error_reg
& FERR_NF_FBD_ERR_MASK
)) {
463 errors
= error_reg
& FERR_NF_FBD_ERR_MASK
;
464 errnum
= find_first_bit(&errors
,
465 ARRAY_SIZE(ferr_nf_fbd_name
));
466 specific
= GET_ERR_FROM_TABLE(ferr_nf_fbd_name
, errnum
);
467 branch
= (GET_FBD_NF_IDX(error_reg
) == 2) ? 1 : 0;
469 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
472 pci_read_config_word(pvt
->pci_dev_16_1_fsb_addr_map
,
474 bank
= RECMEMA_BANK(val16
);
475 rank
= RECMEMA_RANK(val16
);
477 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
479 is_wr
= RECMEMB_IS_WR(value
);
480 cas
= RECMEMB_CAS(value
);
481 ras
= RECMEMB_RAS(value
);
483 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
485 channel
= (branch
<< 1);
486 if (IS_SECOND_CH(value
))
489 /* Clear the error bit */
490 pci_write_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
491 FERR_NF_FBD
, error_reg
);
493 /* Form out message */
494 snprintf(pvt
->tmp_prt_buffer
, PAGE_SIZE
,
495 "DRAM-Bank=%d RAS=%d CAS=%d, Err=0x%lx (%s))",
496 bank
, ras
, cas
, errors
, specific
);
498 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED
, mci
, 1, 0, 0,
500 branch
>> 1, channel
% 2, rank
,
501 is_wr
? "Write error" : "Read error",
502 pvt
->tmp_prt_buffer
);
508 * i7300_check_error() - Calls the error checking subroutines
509 * @mci: struct mem_ctl_info pointer
511 static void i7300_check_error(struct mem_ctl_info
*mci
)
513 i7300_process_error_global(mci
);
514 i7300_process_fbd_error(mci
);
518 * i7300_clear_error() - Clears the error registers
519 * @mci: struct mem_ctl_info pointer
521 static void i7300_clear_error(struct mem_ctl_info
*mci
)
523 struct i7300_pvt
*pvt
= mci
->pvt_info
;
526 * All error values are RWC - we need to read and write 1 to the
527 * bit that we want to cleanup
530 /* Clear global error registers */
531 pci_read_config_dword(pvt
->pci_dev_16_2_fsb_err_regs
,
532 FERR_GLOBAL_HI
, &value
);
533 pci_write_config_dword(pvt
->pci_dev_16_2_fsb_err_regs
,
534 FERR_GLOBAL_HI
, value
);
536 pci_read_config_dword(pvt
->pci_dev_16_2_fsb_err_regs
,
537 FERR_GLOBAL_LO
, &value
);
538 pci_write_config_dword(pvt
->pci_dev_16_2_fsb_err_regs
,
539 FERR_GLOBAL_LO
, value
);
541 /* Clear FBD error registers */
542 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
543 FERR_FAT_FBD
, &value
);
544 pci_write_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
545 FERR_FAT_FBD
, value
);
547 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
548 FERR_NF_FBD
, &value
);
549 pci_write_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
554 * i7300_enable_error_reporting() - Enable the memory reporting logic at the
556 * @mci: struct mem_ctl_info pointer
558 static void i7300_enable_error_reporting(struct mem_ctl_info
*mci
)
560 struct i7300_pvt
*pvt
= mci
->pvt_info
;
563 /* Read the FBD Error Mask Register */
564 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
565 EMASK_FBD
, &fbd_error_mask
);
567 /* Enable with a '0' */
568 fbd_error_mask
&= ~(EMASK_FBD_ERR_MASK
);
570 pci_write_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
,
571 EMASK_FBD
, fbd_error_mask
);
574 /************************************************
575 * i7300 Functions related to memory enumberation
576 ************************************************/
579 * decode_mtr() - Decodes the MTR descriptor, filling the edac structs
580 * @pvt: pointer to the private data struct used by i7300 driver
581 * @slot: DIMM slot (0 to 7)
582 * @ch: Channel number within the branch (0 or 1)
583 * @branch: Branch number (0 or 1)
584 * @dinfo: Pointer to DIMM info where dimm size is stored
585 * @p_csrow: Pointer to the struct csrow_info that corresponds to that element
587 static int decode_mtr(struct i7300_pvt
*pvt
,
588 int slot
, int ch
, int branch
,
589 struct i7300_dimm_info
*dinfo
,
590 struct dimm_info
*dimm
)
592 int mtr
, ans
, addrBits
, channel
;
594 channel
= to_channel(ch
, branch
);
596 mtr
= pvt
->mtr
[slot
][branch
];
597 ans
= MTR_DIMMS_PRESENT(mtr
) ? 1 : 0;
599 edac_dbg(2, "\tMTR%d CH%d: DIMMs are %sPresent (mtr)\n",
600 slot
, channel
, ans
? "" : "NOT ");
602 /* Determine if there is a DIMM present in this DIMM slot */
606 /* Start with the number of bits for a Bank
608 addrBits
= MTR_DRAM_BANKS_ADDR_BITS
;
609 /* Add thenumber of ROW bits */
610 addrBits
+= MTR_DIMM_ROWS_ADDR_BITS(mtr
);
611 /* add the number of COLUMN bits */
612 addrBits
+= MTR_DIMM_COLS_ADDR_BITS(mtr
);
613 /* add the number of RANK bits */
614 addrBits
+= MTR_DIMM_RANKS(mtr
);
616 addrBits
+= 6; /* add 64 bits per DIMM */
617 addrBits
-= 20; /* divide by 2^^20 */
618 addrBits
-= 3; /* 8 bits per bytes */
620 dinfo
->megabytes
= 1 << addrBits
;
622 edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr
));
624 edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
625 MTR_DIMMS_ETHROTTLE(mtr
) ? "enabled" : "disabled");
627 edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr
));
628 edac_dbg(2, "\t\tNUMRANK: %s\n",
629 MTR_DIMM_RANKS(mtr
) ? "double" : "single");
630 edac_dbg(2, "\t\tNUMROW: %s\n",
631 MTR_DIMM_ROWS(mtr
) == 0 ? "8,192 - 13 rows" :
632 MTR_DIMM_ROWS(mtr
) == 1 ? "16,384 - 14 rows" :
633 MTR_DIMM_ROWS(mtr
) == 2 ? "32,768 - 15 rows" :
635 edac_dbg(2, "\t\tNUMCOL: %s\n",
636 MTR_DIMM_COLS(mtr
) == 0 ? "1,024 - 10 columns" :
637 MTR_DIMM_COLS(mtr
) == 1 ? "2,048 - 11 columns" :
638 MTR_DIMM_COLS(mtr
) == 2 ? "4,096 - 12 columns" :
640 edac_dbg(2, "\t\tSIZE: %d MB\n", dinfo
->megabytes
);
643 * The type of error detection actually depends of the
644 * mode of operation. When it is just one single memory chip, at
645 * socket 0, channel 0, it uses 8-byte-over-32-byte SECDED+ code.
646 * In normal or mirrored mode, it uses Lockstep mode,
647 * with the possibility of using an extended algorithm for x8 memories
648 * See datasheet Sections 7.3.6 to 7.3.8
651 dimm
->nr_pages
= MiB_TO_PAGES(dinfo
->megabytes
);
653 dimm
->mtype
= MEM_FB_DDR2
;
654 if (IS_SINGLE_MODE(pvt
->mc_settings_a
)) {
655 dimm
->edac_mode
= EDAC_SECDED
;
656 edac_dbg(2, "\t\tECC code is 8-byte-over-32-byte SECDED+ code\n");
658 edac_dbg(2, "\t\tECC code is on Lockstep mode\n");
659 if (MTR_DRAM_WIDTH(mtr
) == 8)
660 dimm
->edac_mode
= EDAC_S8ECD8ED
;
662 dimm
->edac_mode
= EDAC_S4ECD4ED
;
665 /* ask what device type on this row */
666 if (MTR_DRAM_WIDTH(mtr
) == 8) {
667 edac_dbg(2, "\t\tScrub algorithm for x8 is on %s mode\n",
668 IS_SCRBALGO_ENHANCED(pvt
->mc_settings
) ?
669 "enhanced" : "normal");
671 dimm
->dtype
= DEV_X8
;
673 dimm
->dtype
= DEV_X4
;
679 * print_dimm_size() - Prints dump of the memory organization
680 * @pvt: pointer to the private data struct used by i7300 driver
682 * Useful for debug. If debug is disabled, this routine do nothing
684 static void print_dimm_size(struct i7300_pvt
*pvt
)
686 #ifdef CONFIG_EDAC_DEBUG
687 struct i7300_dimm_info
*dinfo
;
693 p
= pvt
->tmp_prt_buffer
;
695 n
= snprintf(p
, space
, " ");
698 for (channel
= 0; channel
< MAX_CHANNELS
; channel
++) {
699 n
= snprintf(p
, space
, "channel %d | ", channel
);
703 edac_dbg(2, "%s\n", pvt
->tmp_prt_buffer
);
704 p
= pvt
->tmp_prt_buffer
;
706 n
= snprintf(p
, space
, "-------------------------------"
707 "------------------------------");
710 edac_dbg(2, "%s\n", pvt
->tmp_prt_buffer
);
711 p
= pvt
->tmp_prt_buffer
;
714 for (slot
= 0; slot
< MAX_SLOTS
; slot
++) {
715 n
= snprintf(p
, space
, "csrow/SLOT %d ", slot
);
719 for (channel
= 0; channel
< MAX_CHANNELS
; channel
++) {
720 dinfo
= &pvt
->dimm_info
[slot
][channel
];
721 n
= snprintf(p
, space
, "%4d MB | ", dinfo
->megabytes
);
726 edac_dbg(2, "%s\n", pvt
->tmp_prt_buffer
);
727 p
= pvt
->tmp_prt_buffer
;
731 n
= snprintf(p
, space
, "-------------------------------"
732 "------------------------------");
735 edac_dbg(2, "%s\n", pvt
->tmp_prt_buffer
);
736 p
= pvt
->tmp_prt_buffer
;
742 * i7300_init_csrows() - Initialize the 'csrows' table within
743 * the mci control structure with the
744 * addressing of memory.
745 * @mci: struct mem_ctl_info pointer
747 static int i7300_init_csrows(struct mem_ctl_info
*mci
)
749 struct i7300_pvt
*pvt
;
750 struct i7300_dimm_info
*dinfo
;
753 int ch
, branch
, slot
, channel
, max_channel
, max_branch
;
754 struct dimm_info
*dimm
;
758 edac_dbg(2, "Memory Technology Registers:\n");
760 if (IS_SINGLE_MODE(pvt
->mc_settings_a
)) {
764 max_branch
= MAX_BRANCHES
;
765 max_channel
= MAX_CH_PER_BRANCH
;
768 /* Get the AMB present registers for the four channels */
769 for (branch
= 0; branch
< max_branch
; branch
++) {
770 /* Read and dump branch 0's MTRs */
771 channel
= to_channel(0, branch
);
772 pci_read_config_word(pvt
->pci_dev_2x_0_fbd_branch
[branch
],
774 &pvt
->ambpresent
[channel
]);
775 edac_dbg(2, "\t\tAMB-present CH%d = 0x%x:\n",
776 channel
, pvt
->ambpresent
[channel
]);
778 if (max_channel
== 1)
781 channel
= to_channel(1, branch
);
782 pci_read_config_word(pvt
->pci_dev_2x_0_fbd_branch
[branch
],
784 &pvt
->ambpresent
[channel
]);
785 edac_dbg(2, "\t\tAMB-present CH%d = 0x%x:\n",
786 channel
, pvt
->ambpresent
[channel
]);
789 /* Get the set of MTR[0-7] regs by each branch */
790 for (slot
= 0; slot
< MAX_SLOTS
; slot
++) {
791 int where
= mtr_regs
[slot
];
792 for (branch
= 0; branch
< max_branch
; branch
++) {
793 pci_read_config_word(pvt
->pci_dev_2x_0_fbd_branch
[branch
],
795 &pvt
->mtr
[slot
][branch
]);
796 for (ch
= 0; ch
< max_channel
; ch
++) {
797 int channel
= to_channel(ch
, branch
);
799 dimm
= EDAC_DIMM_PTR(mci
->layers
, mci
->dimms
,
800 mci
->n_layers
, branch
, ch
, slot
);
802 dinfo
= &pvt
->dimm_info
[slot
][channel
];
804 mtr
= decode_mtr(pvt
, slot
, ch
, branch
,
807 /* if no DIMMS on this row, continue */
808 if (!MTR_DIMMS_PRESENT(mtr
))
821 * decode_mir() - Decodes Memory Interleave Register (MIR) info
822 * @int mir_no: number of the MIR register to decode
823 * @mir: array with the MIR data cached on the driver
825 static void decode_mir(int mir_no
, u16 mir
[MAX_MIR
])
828 edac_dbg(2, "MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n",
830 (mir
[mir_no
] >> 4) & 0xfff,
831 (mir
[mir_no
] & 1) ? "B0" : "",
832 (mir
[mir_no
] & 2) ? "B1" : "");
836 * i7300_get_mc_regs() - Get the contents of the MC enumeration registers
837 * @mci: struct mem_ctl_info pointer
839 * Data read is cached internally for its usage when needed
841 static int i7300_get_mc_regs(struct mem_ctl_info
*mci
)
843 struct i7300_pvt
*pvt
;
849 pci_read_config_dword(pvt
->pci_dev_16_0_fsb_ctlr
, AMBASE
,
850 (u32
*) &pvt
->ambase
);
852 edac_dbg(2, "AMBASE= 0x%lx\n", (long unsigned int)pvt
->ambase
);
854 /* Get the Branch Map regs */
855 pci_read_config_word(pvt
->pci_dev_16_1_fsb_addr_map
, TOLM
, &pvt
->tolm
);
857 edac_dbg(2, "TOLM (number of 256M regions) =%u (0x%x)\n",
858 pvt
->tolm
, pvt
->tolm
);
860 actual_tolm
= (u32
) ((1000l * pvt
->tolm
) >> (30 - 28));
861 edac_dbg(2, "Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
862 actual_tolm
/1000, actual_tolm
% 1000, pvt
->tolm
<< 28);
864 /* Get memory controller settings */
865 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
, MC_SETTINGS
,
867 pci_read_config_dword(pvt
->pci_dev_16_1_fsb_addr_map
, MC_SETTINGS_A
,
868 &pvt
->mc_settings_a
);
870 if (IS_SINGLE_MODE(pvt
->mc_settings_a
))
871 edac_dbg(0, "Memory controller operating on single mode\n");
873 edac_dbg(0, "Memory controller operating on %smirrored mode\n",
874 IS_MIRRORED(pvt
->mc_settings
) ? "" : "non-");
876 edac_dbg(0, "Error detection is %s\n",
877 IS_ECC_ENABLED(pvt
->mc_settings
) ? "enabled" : "disabled");
878 edac_dbg(0, "Retry is %s\n",
879 IS_RETRY_ENABLED(pvt
->mc_settings
) ? "enabled" : "disabled");
881 /* Get Memory Interleave Range registers */
882 pci_read_config_word(pvt
->pci_dev_16_1_fsb_addr_map
, MIR0
,
884 pci_read_config_word(pvt
->pci_dev_16_1_fsb_addr_map
, MIR1
,
886 pci_read_config_word(pvt
->pci_dev_16_1_fsb_addr_map
, MIR2
,
889 /* Decode the MIR regs */
890 for (i
= 0; i
< MAX_MIR
; i
++)
891 decode_mir(i
, pvt
->mir
);
893 rc
= i7300_init_csrows(mci
);
897 /* Go and determine the size of each DIMM and place in an
899 print_dimm_size(pvt
);
904 /*************************************************
905 * i7300 Functions related to device probe/release
906 *************************************************/
909 * i7300_put_devices() - Release the PCI devices
910 * @mci: struct mem_ctl_info pointer
912 static void i7300_put_devices(struct mem_ctl_info
*mci
)
914 struct i7300_pvt
*pvt
;
919 /* Decrement usage count for devices */
920 for (branch
= 0; branch
< MAX_CH_PER_BRANCH
; branch
++)
921 pci_dev_put(pvt
->pci_dev_2x_0_fbd_branch
[branch
]);
922 pci_dev_put(pvt
->pci_dev_16_2_fsb_err_regs
);
923 pci_dev_put(pvt
->pci_dev_16_1_fsb_addr_map
);
927 * i7300_get_devices() - Find and perform 'get' operation on the MCH's
928 * device/functions we want to reference for this driver
929 * @mci: struct mem_ctl_info pointer
931 * Access and prepare the several devices for usage:
932 * I7300 devices used by this driver:
933 * Device 16, functions 0,1 and 2: PCI_DEVICE_ID_INTEL_I7300_MCH_ERR
934 * Device 21 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB0
935 * Device 22 function 0: PCI_DEVICE_ID_INTEL_I7300_MCH_FB1
937 static int i7300_get_devices(struct mem_ctl_info
*mci
)
939 struct i7300_pvt
*pvt
;
940 struct pci_dev
*pdev
;
944 /* Attempt to 'get' the MCH register we want */
946 while ((pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
,
947 PCI_DEVICE_ID_INTEL_I7300_MCH_ERR
,
949 /* Store device 16 funcs 1 and 2 */
950 switch (PCI_FUNC(pdev
->devfn
)) {
952 if (!pvt
->pci_dev_16_1_fsb_addr_map
)
953 pvt
->pci_dev_16_1_fsb_addr_map
=
957 if (!pvt
->pci_dev_16_2_fsb_err_regs
)
958 pvt
->pci_dev_16_2_fsb_err_regs
=
964 if (!pvt
->pci_dev_16_1_fsb_addr_map
||
965 !pvt
->pci_dev_16_2_fsb_err_regs
) {
966 /* At least one device was not found */
967 i7300_printk(KERN_ERR
,
968 "'system address,Process Bus' device not found:"
969 "vendor 0x%x device 0x%x ERR funcs (broken BIOS?)\n",
971 PCI_DEVICE_ID_INTEL_I7300_MCH_ERR
);
975 edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n",
976 pci_name(pvt
->pci_dev_16_0_fsb_ctlr
),
977 pvt
->pci_dev_16_0_fsb_ctlr
->vendor
,
978 pvt
->pci_dev_16_0_fsb_ctlr
->device
);
979 edac_dbg(1, "Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
980 pci_name(pvt
->pci_dev_16_1_fsb_addr_map
),
981 pvt
->pci_dev_16_1_fsb_addr_map
->vendor
,
982 pvt
->pci_dev_16_1_fsb_addr_map
->device
);
983 edac_dbg(1, "FSB Error Regs - PCI Bus ID: %s %x:%x\n",
984 pci_name(pvt
->pci_dev_16_2_fsb_err_regs
),
985 pvt
->pci_dev_16_2_fsb_err_regs
->vendor
,
986 pvt
->pci_dev_16_2_fsb_err_regs
->device
);
988 pvt
->pci_dev_2x_0_fbd_branch
[0] = pci_get_device(PCI_VENDOR_ID_INTEL
,
989 PCI_DEVICE_ID_INTEL_I7300_MCH_FB0
,
991 if (!pvt
->pci_dev_2x_0_fbd_branch
[0]) {
992 i7300_printk(KERN_ERR
,
993 "MC: 'BRANCH 0' device not found:"
994 "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
995 PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_I7300_MCH_FB0
);
999 pvt
->pci_dev_2x_0_fbd_branch
[1] = pci_get_device(PCI_VENDOR_ID_INTEL
,
1000 PCI_DEVICE_ID_INTEL_I7300_MCH_FB1
,
1002 if (!pvt
->pci_dev_2x_0_fbd_branch
[1]) {
1003 i7300_printk(KERN_ERR
,
1004 "MC: 'BRANCH 1' device not found:"
1005 "vendor 0x%x device 0x%x Func 0 "
1007 PCI_VENDOR_ID_INTEL
,
1008 PCI_DEVICE_ID_INTEL_I7300_MCH_FB1
);
1015 i7300_put_devices(mci
);
1020 * i7300_init_one() - Probe for one instance of the device
1021 * @pdev: struct pci_dev pointer
1022 * @id: struct pci_device_id pointer - currently unused
1024 static int i7300_init_one(struct pci_dev
*pdev
, const struct pci_device_id
*id
)
1026 struct mem_ctl_info
*mci
;
1027 struct edac_mc_layer layers
[3];
1028 struct i7300_pvt
*pvt
;
1031 /* wake up device */
1032 rc
= pci_enable_device(pdev
);
1036 edac_dbg(0, "MC: pdev bus %u dev=0x%x fn=0x%x\n",
1038 PCI_SLOT(pdev
->devfn
), PCI_FUNC(pdev
->devfn
));
1040 /* We only are looking for func 0 of the set */
1041 if (PCI_FUNC(pdev
->devfn
) != 0)
1044 /* allocate a new MC control structure */
1045 layers
[0].type
= EDAC_MC_LAYER_BRANCH
;
1046 layers
[0].size
= MAX_BRANCHES
;
1047 layers
[0].is_virt_csrow
= false;
1048 layers
[1].type
= EDAC_MC_LAYER_CHANNEL
;
1049 layers
[1].size
= MAX_CH_PER_BRANCH
;
1050 layers
[1].is_virt_csrow
= true;
1051 layers
[2].type
= EDAC_MC_LAYER_SLOT
;
1052 layers
[2].size
= MAX_SLOTS
;
1053 layers
[2].is_virt_csrow
= true;
1054 mci
= edac_mc_alloc(0, ARRAY_SIZE(layers
), layers
, sizeof(*pvt
));
1058 edac_dbg(0, "MC: mci = %p\n", mci
);
1060 mci
->pdev
= &pdev
->dev
; /* record ptr to the generic device */
1062 pvt
= mci
->pvt_info
;
1063 pvt
->pci_dev_16_0_fsb_ctlr
= pdev
; /* Record this device in our private */
1065 pvt
->tmp_prt_buffer
= kmalloc(PAGE_SIZE
, GFP_KERNEL
);
1066 if (!pvt
->tmp_prt_buffer
) {
1071 /* 'get' the pci devices we want to reserve for our use */
1072 if (i7300_get_devices(mci
))
1076 mci
->mtype_cap
= MEM_FLAG_FB_DDR2
;
1077 mci
->edac_ctl_cap
= EDAC_FLAG_NONE
;
1078 mci
->edac_cap
= EDAC_FLAG_NONE
;
1079 mci
->mod_name
= "i7300_edac.c";
1080 mci
->mod_ver
= I7300_REVISION
;
1081 mci
->ctl_name
= i7300_devs
[0].ctl_name
;
1082 mci
->dev_name
= pci_name(pdev
);
1083 mci
->ctl_page_to_phys
= NULL
;
1085 /* Set the function pointer to an actual operation function */
1086 mci
->edac_check
= i7300_check_error
;
1088 /* initialize the MC control structure 'csrows' table
1089 * with the mapping and control information */
1090 if (i7300_get_mc_regs(mci
)) {
1091 edac_dbg(0, "MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i7300_init_csrows() returned nonzero value\n");
1092 mci
->edac_cap
= EDAC_FLAG_NONE
; /* no csrows found */
1094 edac_dbg(1, "MC: Enable error reporting now\n");
1095 i7300_enable_error_reporting(mci
);
1098 /* add this new MC control structure to EDAC's list of MCs */
1099 if (edac_mc_add_mc(mci
)) {
1100 edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
1101 /* FIXME: perhaps some code should go here that disables error
1102 * reporting if we just enabled it
1107 i7300_clear_error(mci
);
1109 /* allocating generic PCI control info */
1110 i7300_pci
= edac_pci_create_generic_ctl(&pdev
->dev
, EDAC_MOD_STR
);
1113 "%s(): Unable to create PCI control\n",
1116 "%s(): PCI error report via EDAC not setup\n",
1122 /* Error exit unwinding stack */
1125 i7300_put_devices(mci
);
1128 kfree(pvt
->tmp_prt_buffer
);
1134 * i7300_remove_one() - Remove the driver
1135 * @pdev: struct pci_dev pointer
1137 static void i7300_remove_one(struct pci_dev
*pdev
)
1139 struct mem_ctl_info
*mci
;
1145 edac_pci_release_generic_ctl(i7300_pci
);
1147 mci
= edac_mc_del_mc(&pdev
->dev
);
1151 tmp
= ((struct i7300_pvt
*)mci
->pvt_info
)->tmp_prt_buffer
;
1153 /* retrieve references to resources, and free those resources */
1154 i7300_put_devices(mci
);
1161 * pci_device_id: table for which devices we are looking for
1163 * Has only 8086:360c PCI ID
1165 static const struct pci_device_id i7300_pci_tbl
[] = {
1166 {PCI_DEVICE(PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR
)},
1167 {0,} /* 0 terminated list. */
1170 MODULE_DEVICE_TABLE(pci
, i7300_pci_tbl
);
1173 * i7300_driver: pci_driver structure for this module
1175 static struct pci_driver i7300_driver
= {
1176 .name
= "i7300_edac",
1177 .probe
= i7300_init_one
,
1178 .remove
= i7300_remove_one
,
1179 .id_table
= i7300_pci_tbl
,
1183 * i7300_init() - Registers the driver
1185 static int __init
i7300_init(void)
1191 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1194 pci_rc
= pci_register_driver(&i7300_driver
);
1196 return (pci_rc
< 0) ? pci_rc
: 0;
1200 * i7300_init() - Unregisters the driver
1202 static void __exit
i7300_exit(void)
1205 pci_unregister_driver(&i7300_driver
);
1208 module_init(i7300_init
);
1209 module_exit(i7300_exit
);
1211 MODULE_LICENSE("GPL");
1212 MODULE_AUTHOR("Mauro Carvalho Chehab");
1213 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1214 MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - "
1217 module_param(edac_op_state
, int, 0444);
1218 MODULE_PARM_DESC(edac_op_state
, "EDAC Error Reporting state: 0=Poll,1=NMI");