EDAC: i7core, sb_edac: Don't return NOTIFY_BAD from mce_decoder callback
[linux/fpc-iii.git] / drivers / edac / xgene_edac.c
blobbf19b6e3bd129929372ba486450b822c098169b4
1 /*
2 * APM X-Gene SoC EDAC (error detection and correction)
4 * Copyright (c) 2015, Applied Micro Circuits Corporation
5 * Author: Feng Kan <fkan@apm.com>
6 * Loc Ho <lho@apm.com>
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include <linux/ctype.h>
23 #include <linux/edac.h>
24 #include <linux/interrupt.h>
25 #include <linux/mfd/syscon.h>
26 #include <linux/module.h>
27 #include <linux/of.h>
28 #include <linux/of_address.h>
29 #include <linux/regmap.h>
31 #include "edac_core.h"
32 #include "edac_module.h"
34 #define EDAC_MOD_STR "xgene_edac"
36 /* Global error configuration status registers (CSR) */
37 #define PCPHPERRINTSTS 0x0000
38 #define PCPHPERRINTMSK 0x0004
39 #define MCU_CTL_ERR_MASK BIT(12)
40 #define IOB_PA_ERR_MASK BIT(11)
41 #define IOB_BA_ERR_MASK BIT(10)
42 #define IOB_XGIC_ERR_MASK BIT(9)
43 #define IOB_RB_ERR_MASK BIT(8)
44 #define L3C_UNCORR_ERR_MASK BIT(5)
45 #define MCU_UNCORR_ERR_MASK BIT(4)
46 #define PMD3_MERR_MASK BIT(3)
47 #define PMD2_MERR_MASK BIT(2)
48 #define PMD1_MERR_MASK BIT(1)
49 #define PMD0_MERR_MASK BIT(0)
50 #define PCPLPERRINTSTS 0x0008
51 #define PCPLPERRINTMSK 0x000C
52 #define CSW_SWITCH_TRACE_ERR_MASK BIT(2)
53 #define L3C_CORR_ERR_MASK BIT(1)
54 #define MCU_CORR_ERR_MASK BIT(0)
55 #define MEMERRINTSTS 0x0010
56 #define MEMERRINTMSK 0x0014
58 struct xgene_edac {
59 struct device *dev;
60 struct regmap *csw_map;
61 struct regmap *mcba_map;
62 struct regmap *mcbb_map;
63 struct regmap *efuse_map;
64 struct regmap *rb_map;
65 void __iomem *pcp_csr;
66 spinlock_t lock;
67 struct dentry *dfs;
69 struct list_head mcus;
70 struct list_head pmds;
71 struct list_head l3s;
72 struct list_head socs;
74 struct mutex mc_lock;
75 int mc_active_mask;
76 int mc_registered_mask;
79 static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
81 *val = readl(edac->pcp_csr + reg);
84 static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
85 u32 bits_mask)
87 u32 val;
89 spin_lock(&edac->lock);
90 val = readl(edac->pcp_csr + reg);
91 val &= ~bits_mask;
92 writel(val, edac->pcp_csr + reg);
93 spin_unlock(&edac->lock);
96 static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
97 u32 bits_mask)
99 u32 val;
101 spin_lock(&edac->lock);
102 val = readl(edac->pcp_csr + reg);
103 val |= bits_mask;
104 writel(val, edac->pcp_csr + reg);
105 spin_unlock(&edac->lock);
108 /* Memory controller error CSR */
109 #define MCU_MAX_RANK 8
110 #define MCU_RANK_STRIDE 0x40
112 #define MCUGECR 0x0110
113 #define MCU_GECR_DEMANDUCINTREN_MASK BIT(0)
114 #define MCU_GECR_BACKUCINTREN_MASK BIT(1)
115 #define MCU_GECR_CINTREN_MASK BIT(2)
116 #define MUC_GECR_MCUADDRERREN_MASK BIT(9)
117 #define MCUGESR 0x0114
118 #define MCU_GESR_ADDRNOMATCH_ERR_MASK BIT(7)
119 #define MCU_GESR_ADDRMULTIMATCH_ERR_MASK BIT(6)
120 #define MCU_GESR_PHYP_ERR_MASK BIT(3)
121 #define MCUESRR0 0x0314
122 #define MCU_ESRR_MULTUCERR_MASK BIT(3)
123 #define MCU_ESRR_BACKUCERR_MASK BIT(2)
124 #define MCU_ESRR_DEMANDUCERR_MASK BIT(1)
125 #define MCU_ESRR_CERR_MASK BIT(0)
126 #define MCUESRRA0 0x0318
127 #define MCUEBLRR0 0x031c
128 #define MCU_EBLRR_ERRBANK_RD(src) (((src) & 0x00000007) >> 0)
129 #define MCUERCRR0 0x0320
130 #define MCU_ERCRR_ERRROW_RD(src) (((src) & 0xFFFF0000) >> 16)
131 #define MCU_ERCRR_ERRCOL_RD(src) ((src) & 0x00000FFF)
132 #define MCUSBECNT0 0x0324
133 #define MCU_SBECNT_COUNT(src) ((src) & 0xFFFF)
135 #define CSW_CSWCR 0x0000
136 #define CSW_CSWCR_DUALMCB_MASK BIT(0)
138 #define MCBADDRMR 0x0000
139 #define MCBADDRMR_MCU_INTLV_MODE_MASK BIT(3)
140 #define MCBADDRMR_DUALMCU_MODE_MASK BIT(2)
141 #define MCBADDRMR_MCB_INTLV_MODE_MASK BIT(1)
142 #define MCBADDRMR_ADDRESS_MODE_MASK BIT(0)
144 struct xgene_edac_mc_ctx {
145 struct list_head next;
146 char *name;
147 struct mem_ctl_info *mci;
148 struct xgene_edac *edac;
149 void __iomem *mcu_csr;
150 u32 mcu_id;
153 static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
154 const char __user *data,
155 size_t count, loff_t *ppos)
157 struct mem_ctl_info *mci = file->private_data;
158 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
159 int i;
161 for (i = 0; i < MCU_MAX_RANK; i++) {
162 writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
163 MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
164 ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
166 return count;
169 static const struct file_operations xgene_edac_mc_debug_inject_fops = {
170 .open = simple_open,
171 .write = xgene_edac_mc_err_inject_write,
172 .llseek = generic_file_llseek,
175 static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
177 if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
178 return;
180 if (!mci->debugfs)
181 return;
183 edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
184 &xgene_edac_mc_debug_inject_fops);
187 static void xgene_edac_mc_check(struct mem_ctl_info *mci)
189 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
190 unsigned int pcp_hp_stat;
191 unsigned int pcp_lp_stat;
192 u32 reg;
193 u32 rank;
194 u32 bank;
195 u32 count;
196 u32 col_row;
198 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
199 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
200 if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
201 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
202 (MCU_CORR_ERR_MASK & pcp_lp_stat)))
203 return;
205 for (rank = 0; rank < MCU_MAX_RANK; rank++) {
206 reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
208 /* Detect uncorrectable memory error */
209 if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
210 MCU_ESRR_BACKUCERR_MASK)) {
211 /* Detected uncorrectable memory error */
212 edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
213 "MCU uncorrectable error at rank %d\n", rank);
215 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
216 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
219 /* Detect correctable memory error */
220 if (reg & MCU_ESRR_CERR_MASK) {
221 bank = readl(ctx->mcu_csr + MCUEBLRR0 +
222 rank * MCU_RANK_STRIDE);
223 col_row = readl(ctx->mcu_csr + MCUERCRR0 +
224 rank * MCU_RANK_STRIDE);
225 count = readl(ctx->mcu_csr + MCUSBECNT0 +
226 rank * MCU_RANK_STRIDE);
227 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
228 "MCU correctable error at rank %d bank %d column %d row %d count %d\n",
229 rank, MCU_EBLRR_ERRBANK_RD(bank),
230 MCU_ERCRR_ERRCOL_RD(col_row),
231 MCU_ERCRR_ERRROW_RD(col_row),
232 MCU_SBECNT_COUNT(count));
234 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
235 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
238 /* Clear all error registers */
239 writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
240 writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
241 writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
242 rank * MCU_RANK_STRIDE);
243 writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
246 /* Detect memory controller error */
247 reg = readl(ctx->mcu_csr + MCUGESR);
248 if (reg) {
249 if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
250 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
251 "MCU address miss-match error\n");
252 if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
253 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
254 "MCU address multi-match error\n");
256 writel(reg, ctx->mcu_csr + MCUGESR);
260 static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
262 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
263 unsigned int val;
265 if (edac_op_state != EDAC_OPSTATE_INT)
266 return;
268 mutex_lock(&ctx->edac->mc_lock);
271 * As there is only single bit for enable error and interrupt mask,
272 * we must only enable top level interrupt after all MCUs are
273 * registered. Otherwise, if there is an error and the corresponding
274 * MCU has not registered, the interrupt will never get cleared. To
275 * determine all MCU have registered, we will keep track of active
276 * MCUs and registered MCUs.
278 if (enable) {
279 /* Set registered MCU bit */
280 ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
282 /* Enable interrupt after all active MCU registered */
283 if (ctx->edac->mc_registered_mask ==
284 ctx->edac->mc_active_mask) {
285 /* Enable memory controller top level interrupt */
286 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
287 MCU_UNCORR_ERR_MASK |
288 MCU_CTL_ERR_MASK);
289 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
290 MCU_CORR_ERR_MASK);
293 /* Enable MCU interrupt and error reporting */
294 val = readl(ctx->mcu_csr + MCUGECR);
295 val |= MCU_GECR_DEMANDUCINTREN_MASK |
296 MCU_GECR_BACKUCINTREN_MASK |
297 MCU_GECR_CINTREN_MASK |
298 MUC_GECR_MCUADDRERREN_MASK;
299 writel(val, ctx->mcu_csr + MCUGECR);
300 } else {
301 /* Disable MCU interrupt */
302 val = readl(ctx->mcu_csr + MCUGECR);
303 val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
304 MCU_GECR_BACKUCINTREN_MASK |
305 MCU_GECR_CINTREN_MASK |
306 MUC_GECR_MCUADDRERREN_MASK);
307 writel(val, ctx->mcu_csr + MCUGECR);
309 /* Disable memory controller top level interrupt */
310 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
311 MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
312 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
313 MCU_CORR_ERR_MASK);
315 /* Clear registered MCU bit */
316 ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
319 mutex_unlock(&ctx->edac->mc_lock);
322 static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
324 unsigned int reg;
325 u32 mcu_mask;
327 if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, &reg))
328 return 0;
330 if (reg & CSW_CSWCR_DUALMCB_MASK) {
332 * Dual MCB active - Determine if all 4 active or just MCU0
333 * and MCU2 active
335 if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, &reg))
336 return 0;
337 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
338 } else {
340 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
341 * active
343 if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, &reg))
344 return 0;
345 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
348 /* Save active MC mask if hasn't set already */
349 if (!ctx->edac->mc_active_mask)
350 ctx->edac->mc_active_mask = mcu_mask;
352 return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
355 static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
357 struct mem_ctl_info *mci;
358 struct edac_mc_layer layers[2];
359 struct xgene_edac_mc_ctx tmp_ctx;
360 struct xgene_edac_mc_ctx *ctx;
361 struct resource res;
362 int rc;
364 memset(&tmp_ctx, 0, sizeof(tmp_ctx));
365 tmp_ctx.edac = edac;
367 if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
368 return -ENOMEM;
370 rc = of_address_to_resource(np, 0, &res);
371 if (rc < 0) {
372 dev_err(edac->dev, "no MCU resource address\n");
373 goto err_group;
375 tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
376 if (IS_ERR(tmp_ctx.mcu_csr)) {
377 dev_err(edac->dev, "unable to map MCU resource\n");
378 rc = PTR_ERR(tmp_ctx.mcu_csr);
379 goto err_group;
382 /* Ignore non-active MCU */
383 if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
384 dev_err(edac->dev, "no memory-controller property\n");
385 rc = -ENODEV;
386 goto err_group;
388 if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
389 rc = -ENODEV;
390 goto err_group;
393 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
394 layers[0].size = 4;
395 layers[0].is_virt_csrow = true;
396 layers[1].type = EDAC_MC_LAYER_CHANNEL;
397 layers[1].size = 2;
398 layers[1].is_virt_csrow = false;
399 mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
400 sizeof(*ctx));
401 if (!mci) {
402 rc = -ENOMEM;
403 goto err_group;
406 ctx = mci->pvt_info;
407 *ctx = tmp_ctx; /* Copy over resource value */
408 ctx->name = "xgene_edac_mc_err";
409 ctx->mci = mci;
410 mci->pdev = &mci->dev;
411 mci->ctl_name = ctx->name;
412 mci->dev_name = ctx->name;
414 mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
415 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
416 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
417 mci->edac_cap = EDAC_FLAG_SECDED;
418 mci->mod_name = EDAC_MOD_STR;
419 mci->mod_ver = "0.1";
420 mci->ctl_page_to_phys = NULL;
421 mci->scrub_cap = SCRUB_FLAG_HW_SRC;
422 mci->scrub_mode = SCRUB_HW_SRC;
424 if (edac_op_state == EDAC_OPSTATE_POLL)
425 mci->edac_check = xgene_edac_mc_check;
427 if (edac_mc_add_mc(mci)) {
428 dev_err(edac->dev, "edac_mc_add_mc failed\n");
429 rc = -EINVAL;
430 goto err_free;
433 xgene_edac_mc_create_debugfs_node(mci);
435 list_add(&ctx->next, &edac->mcus);
437 xgene_edac_mc_irq_ctl(mci, true);
439 devres_remove_group(edac->dev, xgene_edac_mc_add);
441 dev_info(edac->dev, "X-Gene EDAC MC registered\n");
442 return 0;
444 err_free:
445 edac_mc_free(mci);
446 err_group:
447 devres_release_group(edac->dev, xgene_edac_mc_add);
448 return rc;
451 static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
453 xgene_edac_mc_irq_ctl(mcu->mci, false);
454 edac_mc_del_mc(&mcu->mci->dev);
455 edac_mc_free(mcu->mci);
456 return 0;
459 /* CPU L1/L2 error CSR */
460 #define MAX_CPU_PER_PMD 2
461 #define CPU_CSR_STRIDE 0x00100000
462 #define CPU_L2C_PAGE 0x000D0000
463 #define CPU_MEMERR_L2C_PAGE 0x000E0000
464 #define CPU_MEMERR_CPU_PAGE 0x000F0000
466 #define MEMERR_CPU_ICFECR_PAGE_OFFSET 0x0000
467 #define MEMERR_CPU_ICFESR_PAGE_OFFSET 0x0004
468 #define MEMERR_CPU_ICFESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
469 #define MEMERR_CPU_ICFESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
470 #define MEMERR_CPU_ICFESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
471 #define MEMERR_CPU_ICFESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
472 #define MEMERR_CPU_ICFESR_MULTCERR_MASK BIT(2)
473 #define MEMERR_CPU_ICFESR_CERR_MASK BIT(0)
474 #define MEMERR_CPU_LSUESR_PAGE_OFFSET 0x000c
475 #define MEMERR_CPU_LSUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
476 #define MEMERR_CPU_LSUESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
477 #define MEMERR_CPU_LSUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
478 #define MEMERR_CPU_LSUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
479 #define MEMERR_CPU_LSUESR_MULTCERR_MASK BIT(2)
480 #define MEMERR_CPU_LSUESR_CERR_MASK BIT(0)
481 #define MEMERR_CPU_LSUECR_PAGE_OFFSET 0x0008
482 #define MEMERR_CPU_MMUECR_PAGE_OFFSET 0x0010
483 #define MEMERR_CPU_MMUESR_PAGE_OFFSET 0x0014
484 #define MEMERR_CPU_MMUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
485 #define MEMERR_CPU_MMUESR_ERRINDEX_RD(src) (((src) & 0x007F0000) >> 16)
486 #define MEMERR_CPU_MMUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
487 #define MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK BIT(7)
488 #define MEMERR_CPU_MMUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
489 #define MEMERR_CPU_MMUESR_MULTCERR_MASK BIT(2)
490 #define MEMERR_CPU_MMUESR_CERR_MASK BIT(0)
491 #define MEMERR_CPU_ICFESRA_PAGE_OFFSET 0x0804
492 #define MEMERR_CPU_LSUESRA_PAGE_OFFSET 0x080c
493 #define MEMERR_CPU_MMUESRA_PAGE_OFFSET 0x0814
495 #define MEMERR_L2C_L2ECR_PAGE_OFFSET 0x0000
496 #define MEMERR_L2C_L2ESR_PAGE_OFFSET 0x0004
497 #define MEMERR_L2C_L2ESR_ERRSYN_RD(src) (((src) & 0xFF000000) >> 24)
498 #define MEMERR_L2C_L2ESR_ERRWAY_RD(src) (((src) & 0x00FC0000) >> 18)
499 #define MEMERR_L2C_L2ESR_ERRCPU_RD(src) (((src) & 0x00020000) >> 17)
500 #define MEMERR_L2C_L2ESR_ERRGROUP_RD(src) (((src) & 0x0000E000) >> 13)
501 #define MEMERR_L2C_L2ESR_ERRACTION_RD(src) (((src) & 0x00001C00) >> 10)
502 #define MEMERR_L2C_L2ESR_ERRTYPE_RD(src) (((src) & 0x00000300) >> 8)
503 #define MEMERR_L2C_L2ESR_MULTUCERR_MASK BIT(3)
504 #define MEMERR_L2C_L2ESR_MULTICERR_MASK BIT(2)
505 #define MEMERR_L2C_L2ESR_UCERR_MASK BIT(1)
506 #define MEMERR_L2C_L2ESR_ERR_MASK BIT(0)
507 #define MEMERR_L2C_L2EALR_PAGE_OFFSET 0x0008
508 #define CPUX_L2C_L2RTOCR_PAGE_OFFSET 0x0010
509 #define MEMERR_L2C_L2EAHR_PAGE_OFFSET 0x000c
510 #define CPUX_L2C_L2RTOSR_PAGE_OFFSET 0x0014
511 #define MEMERR_L2C_L2RTOSR_MULTERR_MASK BIT(1)
512 #define MEMERR_L2C_L2RTOSR_ERR_MASK BIT(0)
513 #define CPUX_L2C_L2RTOALR_PAGE_OFFSET 0x0018
514 #define CPUX_L2C_L2RTOAHR_PAGE_OFFSET 0x001c
515 #define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804
518 * Processor Module Domain (PMD) context - Context for a pair of processsors.
519 * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
520 * its own L1 cache.
522 struct xgene_edac_pmd_ctx {
523 struct list_head next;
524 struct device ddev;
525 char *name;
526 struct xgene_edac *edac;
527 struct edac_device_ctl_info *edac_dev;
528 void __iomem *pmd_csr;
529 u32 pmd;
530 int version;
533 static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
534 int cpu_idx)
536 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
537 void __iomem *pg_f;
538 u32 val;
540 pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
542 val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
543 if (!val)
544 goto chk_lsu;
545 dev_err(edac_dev->dev,
546 "CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
547 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
548 MEMERR_CPU_ICFESR_ERRWAY_RD(val),
549 MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
550 MEMERR_CPU_ICFESR_ERRINFO_RD(val));
551 if (val & MEMERR_CPU_ICFESR_CERR_MASK)
552 dev_err(edac_dev->dev, "One or more correctable error\n");
553 if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
554 dev_err(edac_dev->dev, "Multiple correctable error\n");
555 switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
556 case 1:
557 dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
558 break;
559 case 2:
560 dev_err(edac_dev->dev, "Way select multiple hit\n");
561 break;
562 case 3:
563 dev_err(edac_dev->dev, "Physical tag parity error\n");
564 break;
565 case 4:
566 case 5:
567 dev_err(edac_dev->dev, "L1 data parity error\n");
568 break;
569 case 6:
570 dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
571 break;
574 /* Clear any HW errors */
575 writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
577 if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
578 MEMERR_CPU_ICFESR_MULTCERR_MASK))
579 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
581 chk_lsu:
582 val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
583 if (!val)
584 goto chk_mmu;
585 dev_err(edac_dev->dev,
586 "CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
587 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
588 MEMERR_CPU_LSUESR_ERRWAY_RD(val),
589 MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
590 MEMERR_CPU_LSUESR_ERRINFO_RD(val));
591 if (val & MEMERR_CPU_LSUESR_CERR_MASK)
592 dev_err(edac_dev->dev, "One or more correctable error\n");
593 if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
594 dev_err(edac_dev->dev, "Multiple correctable error\n");
595 switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
596 case 0:
597 dev_err(edac_dev->dev, "Load tag error\n");
598 break;
599 case 1:
600 dev_err(edac_dev->dev, "Load data error\n");
601 break;
602 case 2:
603 dev_err(edac_dev->dev, "WSL multihit error\n");
604 break;
605 case 3:
606 dev_err(edac_dev->dev, "Store tag error\n");
607 break;
608 case 4:
609 dev_err(edac_dev->dev,
610 "DTB multihit from load pipeline error\n");
611 break;
612 case 5:
613 dev_err(edac_dev->dev,
614 "DTB multihit from store pipeline error\n");
615 break;
618 /* Clear any HW errors */
619 writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
621 if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
622 MEMERR_CPU_LSUESR_MULTCERR_MASK))
623 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
625 chk_mmu:
626 val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
627 if (!val)
628 return;
629 dev_err(edac_dev->dev,
630 "CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
631 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
632 MEMERR_CPU_MMUESR_ERRWAY_RD(val),
633 MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
634 MEMERR_CPU_MMUESR_ERRINFO_RD(val),
635 val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
636 if (val & MEMERR_CPU_MMUESR_CERR_MASK)
637 dev_err(edac_dev->dev, "One or more correctable error\n");
638 if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
639 dev_err(edac_dev->dev, "Multiple correctable error\n");
640 switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
641 case 0:
642 dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
643 break;
644 case 1:
645 dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
646 break;
647 case 2:
648 dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
649 break;
650 case 3:
651 dev_err(edac_dev->dev, "TMO operation single bank error\n");
652 break;
653 case 4:
654 dev_err(edac_dev->dev, "Stage 2 UTB error\n");
655 break;
656 case 5:
657 dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
658 break;
659 case 6:
660 dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
661 break;
662 case 7:
663 dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
664 break;
667 /* Clear any HW errors */
668 writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
670 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
673 static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
675 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
676 void __iomem *pg_d;
677 void __iomem *pg_e;
678 u32 val_hi;
679 u32 val_lo;
680 u32 val;
682 /* Check L2 */
683 pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
684 val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
685 if (!val)
686 goto chk_l2c;
687 val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
688 val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
689 dev_err(edac_dev->dev,
690 "PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
691 ctx->pmd, val, val_hi, val_lo);
692 dev_err(edac_dev->dev,
693 "ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
694 MEMERR_L2C_L2ESR_ERRSYN_RD(val),
695 MEMERR_L2C_L2ESR_ERRWAY_RD(val),
696 MEMERR_L2C_L2ESR_ERRCPU_RD(val),
697 MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
698 MEMERR_L2C_L2ESR_ERRACTION_RD(val));
700 if (val & MEMERR_L2C_L2ESR_ERR_MASK)
701 dev_err(edac_dev->dev, "One or more correctable error\n");
702 if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
703 dev_err(edac_dev->dev, "Multiple correctable error\n");
704 if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
705 dev_err(edac_dev->dev, "One or more uncorrectable error\n");
706 if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
707 dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
709 switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
710 case 0:
711 dev_err(edac_dev->dev, "Outbound SDB parity error\n");
712 break;
713 case 1:
714 dev_err(edac_dev->dev, "Inbound SDB parity error\n");
715 break;
716 case 2:
717 dev_err(edac_dev->dev, "Tag ECC error\n");
718 break;
719 case 3:
720 dev_err(edac_dev->dev, "Data ECC error\n");
721 break;
724 /* Clear any HW errors */
725 writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
727 if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
728 MEMERR_L2C_L2ESR_MULTICERR_MASK))
729 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
730 if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
731 MEMERR_L2C_L2ESR_MULTUCERR_MASK))
732 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
734 chk_l2c:
735 /* Check if any memory request timed out on L2 cache */
736 pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
737 val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
738 if (val) {
739 val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
740 val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
741 dev_err(edac_dev->dev,
742 "PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
743 ctx->pmd, val, val_hi, val_lo);
744 writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
748 static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
750 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
751 unsigned int pcp_hp_stat;
752 int i;
754 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
755 if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
756 return;
758 /* Check CPU L1 error */
759 for (i = 0; i < MAX_CPU_PER_PMD; i++)
760 xgene_edac_pmd_l1_check(edac_dev, i);
762 /* Check CPU L2 error */
763 xgene_edac_pmd_l2_check(edac_dev);
766 static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
767 int cpu)
769 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
770 void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
771 CPU_MEMERR_CPU_PAGE;
774 * Enable CPU memory error:
775 * MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
777 writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
778 writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
779 writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
782 static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
784 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
785 void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
786 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
788 /* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
789 writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
790 /* Configure L2C HW request time out feature if supported */
791 if (ctx->version > 1)
792 writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
795 static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
796 bool enable)
798 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
799 int i;
801 /* Enable PMD error interrupt */
802 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
803 if (enable)
804 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
805 PMD0_MERR_MASK << ctx->pmd);
806 else
807 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
808 PMD0_MERR_MASK << ctx->pmd);
811 if (enable) {
812 xgene_edac_pmd_hw_cfg(edac_dev);
814 /* Two CPUs per a PMD */
815 for (i = 0; i < MAX_CPU_PER_PMD; i++)
816 xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
820 static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
821 const char __user *data,
822 size_t count, loff_t *ppos)
824 struct edac_device_ctl_info *edac_dev = file->private_data;
825 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
826 void __iomem *cpux_pg_f;
827 int i;
829 for (i = 0; i < MAX_CPU_PER_PMD; i++) {
830 cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
831 CPU_MEMERR_CPU_PAGE;
833 writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
834 MEMERR_CPU_ICFESR_CERR_MASK,
835 cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
836 writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
837 MEMERR_CPU_LSUESR_CERR_MASK,
838 cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
839 writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
840 MEMERR_CPU_MMUESR_CERR_MASK,
841 cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
843 return count;
846 static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
847 const char __user *data,
848 size_t count, loff_t *ppos)
850 struct edac_device_ctl_info *edac_dev = file->private_data;
851 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
852 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
854 writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
855 MEMERR_L2C_L2ESR_MULTICERR_MASK |
856 MEMERR_L2C_L2ESR_UCERR_MASK |
857 MEMERR_L2C_L2ESR_ERR_MASK,
858 pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
859 return count;
862 static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
864 .open = simple_open,
865 .write = xgene_edac_pmd_l1_inject_ctrl_write,
866 .llseek = generic_file_llseek, },
868 .open = simple_open,
869 .write = xgene_edac_pmd_l2_inject_ctrl_write,
870 .llseek = generic_file_llseek, },
874 static void
875 xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
877 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
878 struct dentry *dbgfs_dir;
879 char name[10];
881 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
882 return;
884 snprintf(name, sizeof(name), "PMD%d", ctx->pmd);
885 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
886 if (!dbgfs_dir)
887 return;
889 edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
890 &xgene_edac_pmd_debug_inject_fops[0]);
891 edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
892 &xgene_edac_pmd_debug_inject_fops[1]);
895 static int xgene_edac_pmd_available(u32 efuse, int pmd)
897 return (efuse & (1 << pmd)) ? 0 : 1;
900 static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
901 int version)
903 struct edac_device_ctl_info *edac_dev;
904 struct xgene_edac_pmd_ctx *ctx;
905 struct resource res;
906 char edac_name[10];
907 u32 pmd;
908 int rc;
909 u32 val;
911 if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
912 return -ENOMEM;
914 /* Determine if this PMD is disabled */
915 if (of_property_read_u32(np, "pmd-controller", &pmd)) {
916 dev_err(edac->dev, "no pmd-controller property\n");
917 rc = -ENODEV;
918 goto err_group;
920 rc = regmap_read(edac->efuse_map, 0, &val);
921 if (rc)
922 goto err_group;
923 if (!xgene_edac_pmd_available(val, pmd)) {
924 rc = -ENODEV;
925 goto err_group;
928 snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
929 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
930 edac_name, 1, "l2c", 1, 2, NULL,
931 0, edac_device_alloc_index());
932 if (!edac_dev) {
933 rc = -ENOMEM;
934 goto err_group;
937 ctx = edac_dev->pvt_info;
938 ctx->name = "xgene_pmd_err";
939 ctx->pmd = pmd;
940 ctx->edac = edac;
941 ctx->edac_dev = edac_dev;
942 ctx->ddev = *edac->dev;
943 ctx->version = version;
944 edac_dev->dev = &ctx->ddev;
945 edac_dev->ctl_name = ctx->name;
946 edac_dev->dev_name = ctx->name;
947 edac_dev->mod_name = EDAC_MOD_STR;
949 rc = of_address_to_resource(np, 0, &res);
950 if (rc < 0) {
951 dev_err(edac->dev, "no PMD resource address\n");
952 goto err_free;
954 ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
955 if (IS_ERR(ctx->pmd_csr)) {
956 dev_err(edac->dev,
957 "devm_ioremap_resource failed for PMD resource address\n");
958 rc = PTR_ERR(ctx->pmd_csr);
959 goto err_free;
962 if (edac_op_state == EDAC_OPSTATE_POLL)
963 edac_dev->edac_check = xgene_edac_pmd_check;
965 xgene_edac_pmd_create_debugfs_nodes(edac_dev);
967 rc = edac_device_add_device(edac_dev);
968 if (rc > 0) {
969 dev_err(edac->dev, "edac_device_add_device failed\n");
970 rc = -ENOMEM;
971 goto err_free;
974 if (edac_op_state == EDAC_OPSTATE_INT)
975 edac_dev->op_state = OP_RUNNING_INTERRUPT;
977 list_add(&ctx->next, &edac->pmds);
979 xgene_edac_pmd_hw_ctl(edac_dev, 1);
981 devres_remove_group(edac->dev, xgene_edac_pmd_add);
983 dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
984 return 0;
986 err_free:
987 edac_device_free_ctl_info(edac_dev);
988 err_group:
989 devres_release_group(edac->dev, xgene_edac_pmd_add);
990 return rc;
993 static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
995 struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
997 xgene_edac_pmd_hw_ctl(edac_dev, 0);
998 edac_device_del_device(edac_dev->dev);
999 edac_device_free_ctl_info(edac_dev);
1000 return 0;
1003 /* L3 Error device */
1004 #define L3C_ESR (0x0A * 4)
1005 #define L3C_ESR_DATATAG_MASK BIT(9)
1006 #define L3C_ESR_MULTIHIT_MASK BIT(8)
1007 #define L3C_ESR_UCEVICT_MASK BIT(6)
1008 #define L3C_ESR_MULTIUCERR_MASK BIT(5)
1009 #define L3C_ESR_MULTICERR_MASK BIT(4)
1010 #define L3C_ESR_UCERR_MASK BIT(3)
1011 #define L3C_ESR_CERR_MASK BIT(2)
1012 #define L3C_ESR_UCERRINTR_MASK BIT(1)
1013 #define L3C_ESR_CERRINTR_MASK BIT(0)
1014 #define L3C_ECR (0x0B * 4)
1015 #define L3C_ECR_UCINTREN BIT(3)
1016 #define L3C_ECR_CINTREN BIT(2)
1017 #define L3C_UCERREN BIT(1)
1018 #define L3C_CERREN BIT(0)
1019 #define L3C_ELR (0x0C * 4)
1020 #define L3C_ELR_ERRSYN(src) ((src & 0xFF800000) >> 23)
1021 #define L3C_ELR_ERRWAY(src) ((src & 0x007E0000) >> 17)
1022 #define L3C_ELR_AGENTID(src) ((src & 0x0001E000) >> 13)
1023 #define L3C_ELR_ERRGRP(src) ((src & 0x00000F00) >> 8)
1024 #define L3C_ELR_OPTYPE(src) ((src & 0x000000F0) >> 4)
1025 #define L3C_ELR_PADDRHIGH(src) (src & 0x0000000F)
1026 #define L3C_AELR (0x0D * 4)
1027 #define L3C_BELR (0x0E * 4)
1028 #define L3C_BELR_BANK(src) (src & 0x0000000F)
1030 struct xgene_edac_dev_ctx {
1031 struct list_head next;
1032 struct device ddev;
1033 char *name;
1034 struct xgene_edac *edac;
1035 struct edac_device_ctl_info *edac_dev;
1036 int edac_idx;
1037 void __iomem *dev_csr;
1038 int version;
1042 * Version 1 of the L3 controller has broken single bit correctable logic for
1043 * certain error syndromes. Log them as uncorrectable in that case.
1045 static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1047 if (l3cesr & L3C_ESR_DATATAG_MASK) {
1048 switch (L3C_ELR_ERRSYN(l3celr)) {
1049 case 0x13C:
1050 case 0x0B4:
1051 case 0x007:
1052 case 0x00D:
1053 case 0x00E:
1054 case 0x019:
1055 case 0x01A:
1056 case 0x01C:
1057 case 0x04E:
1058 case 0x041:
1059 return true;
1061 } else if (L3C_ELR_ERRWAY(l3celr) == 9)
1062 return true;
1064 return false;
1067 static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1069 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1070 u32 l3cesr;
1071 u32 l3celr;
1072 u32 l3caelr;
1073 u32 l3cbelr;
1075 l3cesr = readl(ctx->dev_csr + L3C_ESR);
1076 if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1077 return;
1079 if (l3cesr & L3C_ESR_UCERR_MASK)
1080 dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1081 if (l3cesr & L3C_ESR_CERR_MASK)
1082 dev_warn(edac_dev->dev, "L3C correctable error\n");
1084 l3celr = readl(ctx->dev_csr + L3C_ELR);
1085 l3caelr = readl(ctx->dev_csr + L3C_AELR);
1086 l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1087 if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1088 dev_err(edac_dev->dev, "L3C multiple hit error\n");
1089 if (l3cesr & L3C_ESR_UCEVICT_MASK)
1090 dev_err(edac_dev->dev,
1091 "L3C dropped eviction of line with error\n");
1092 if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1093 dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1094 if (l3cesr & L3C_ESR_DATATAG_MASK)
1095 dev_err(edac_dev->dev,
1096 "L3C data error syndrome 0x%X group 0x%X\n",
1097 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1098 else
1099 dev_err(edac_dev->dev,
1100 "L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1101 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1102 L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1104 * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr).
1105 * Address [37:6] in l3caelr. Lower 6 bits are zero.
1107 dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1108 L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1109 (l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1110 dev_err(edac_dev->dev,
1111 "L3C error status register value 0x%X\n", l3cesr);
1113 /* Clear L3C error interrupt */
1114 writel(0, ctx->dev_csr + L3C_ESR);
1116 if (ctx->version <= 1 &&
1117 xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1118 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1119 return;
1121 if (l3cesr & L3C_ESR_CERR_MASK)
1122 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1123 if (l3cesr & L3C_ESR_UCERR_MASK)
1124 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1127 static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1128 bool enable)
1130 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1131 u32 val;
1133 val = readl(ctx->dev_csr + L3C_ECR);
1134 val |= L3C_UCERREN | L3C_CERREN;
1135 /* On disable, we just disable interrupt but keep error enabled */
1136 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1137 if (enable)
1138 val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1139 else
1140 val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1142 writel(val, ctx->dev_csr + L3C_ECR);
1144 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1145 /* Enable/disable L3 error top level interrupt */
1146 if (enable) {
1147 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1148 L3C_UNCORR_ERR_MASK);
1149 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1150 L3C_CORR_ERR_MASK);
1151 } else {
1152 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1153 L3C_UNCORR_ERR_MASK);
1154 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1155 L3C_CORR_ERR_MASK);
1160 static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1161 const char __user *data,
1162 size_t count, loff_t *ppos)
1164 struct edac_device_ctl_info *edac_dev = file->private_data;
1165 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1167 /* Generate all errors */
1168 writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1169 return count;
1172 static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1173 .open = simple_open,
1174 .write = xgene_edac_l3_inject_ctrl_write,
1175 .llseek = generic_file_llseek
1178 static void
1179 xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1181 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1182 struct dentry *dbgfs_dir;
1183 char name[10];
1185 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1186 return;
1188 snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1189 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1190 if (!dbgfs_dir)
1191 return;
1193 debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1194 &xgene_edac_l3_debug_inject_fops);
1197 static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1198 int version)
1200 struct edac_device_ctl_info *edac_dev;
1201 struct xgene_edac_dev_ctx *ctx;
1202 struct resource res;
1203 void __iomem *dev_csr;
1204 int edac_idx;
1205 int rc = 0;
1207 if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1208 return -ENOMEM;
1210 rc = of_address_to_resource(np, 0, &res);
1211 if (rc < 0) {
1212 dev_err(edac->dev, "no L3 resource address\n");
1213 goto err_release_group;
1215 dev_csr = devm_ioremap_resource(edac->dev, &res);
1216 if (IS_ERR(dev_csr)) {
1217 dev_err(edac->dev,
1218 "devm_ioremap_resource failed for L3 resource address\n");
1219 rc = PTR_ERR(dev_csr);
1220 goto err_release_group;
1223 edac_idx = edac_device_alloc_index();
1224 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1225 "l3c", 1, "l3c", 1, 0, NULL, 0,
1226 edac_idx);
1227 if (!edac_dev) {
1228 rc = -ENOMEM;
1229 goto err_release_group;
1232 ctx = edac_dev->pvt_info;
1233 ctx->dev_csr = dev_csr;
1234 ctx->name = "xgene_l3_err";
1235 ctx->edac_idx = edac_idx;
1236 ctx->edac = edac;
1237 ctx->edac_dev = edac_dev;
1238 ctx->ddev = *edac->dev;
1239 ctx->version = version;
1240 edac_dev->dev = &ctx->ddev;
1241 edac_dev->ctl_name = ctx->name;
1242 edac_dev->dev_name = ctx->name;
1243 edac_dev->mod_name = EDAC_MOD_STR;
1245 if (edac_op_state == EDAC_OPSTATE_POLL)
1246 edac_dev->edac_check = xgene_edac_l3_check;
1248 xgene_edac_l3_create_debugfs_nodes(edac_dev);
1250 rc = edac_device_add_device(edac_dev);
1251 if (rc > 0) {
1252 dev_err(edac->dev, "failed edac_device_add_device()\n");
1253 rc = -ENOMEM;
1254 goto err_ctl_free;
1257 if (edac_op_state == EDAC_OPSTATE_INT)
1258 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1260 list_add(&ctx->next, &edac->l3s);
1262 xgene_edac_l3_hw_init(edac_dev, 1);
1264 devres_remove_group(edac->dev, xgene_edac_l3_add);
1266 dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1267 return 0;
1269 err_ctl_free:
1270 edac_device_free_ctl_info(edac_dev);
1271 err_release_group:
1272 devres_release_group(edac->dev, xgene_edac_l3_add);
1273 return rc;
1276 static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1278 struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1280 xgene_edac_l3_hw_init(edac_dev, 0);
1281 edac_device_del_device(l3->edac->dev);
1282 edac_device_free_ctl_info(edac_dev);
1283 return 0;
1286 /* SoC error device */
1287 #define IOBAXIS0TRANSERRINTSTS 0x0000
1288 #define IOBAXIS0_M_ILLEGAL_ACCESS_MASK BIT(1)
1289 #define IOBAXIS0_ILLEGAL_ACCESS_MASK BIT(0)
1290 #define IOBAXIS0TRANSERRINTMSK 0x0004
1291 #define IOBAXIS0TRANSERRREQINFOL 0x0008
1292 #define IOBAXIS0TRANSERRREQINFOH 0x000c
1293 #define REQTYPE_RD(src) (((src) & BIT(0)))
1294 #define ERRADDRH_RD(src) (((src) & 0xffc00000) >> 22)
1295 #define IOBAXIS1TRANSERRINTSTS 0x0010
1296 #define IOBAXIS1TRANSERRINTMSK 0x0014
1297 #define IOBAXIS1TRANSERRREQINFOL 0x0018
1298 #define IOBAXIS1TRANSERRREQINFOH 0x001c
1299 #define IOBPATRANSERRINTSTS 0x0020
1300 #define IOBPA_M_REQIDRAM_CORRUPT_MASK BIT(7)
1301 #define IOBPA_REQIDRAM_CORRUPT_MASK BIT(6)
1302 #define IOBPA_M_TRANS_CORRUPT_MASK BIT(5)
1303 #define IOBPA_TRANS_CORRUPT_MASK BIT(4)
1304 #define IOBPA_M_WDATA_CORRUPT_MASK BIT(3)
1305 #define IOBPA_WDATA_CORRUPT_MASK BIT(2)
1306 #define IOBPA_M_RDATA_CORRUPT_MASK BIT(1)
1307 #define IOBPA_RDATA_CORRUPT_MASK BIT(0)
1308 #define IOBBATRANSERRINTSTS 0x0030
1309 #define M_ILLEGAL_ACCESS_MASK BIT(15)
1310 #define ILLEGAL_ACCESS_MASK BIT(14)
1311 #define M_WIDRAM_CORRUPT_MASK BIT(13)
1312 #define WIDRAM_CORRUPT_MASK BIT(12)
1313 #define M_RIDRAM_CORRUPT_MASK BIT(11)
1314 #define RIDRAM_CORRUPT_MASK BIT(10)
1315 #define M_TRANS_CORRUPT_MASK BIT(9)
1316 #define TRANS_CORRUPT_MASK BIT(8)
1317 #define M_WDATA_CORRUPT_MASK BIT(7)
1318 #define WDATA_CORRUPT_MASK BIT(6)
1319 #define M_RBM_POISONED_REQ_MASK BIT(5)
1320 #define RBM_POISONED_REQ_MASK BIT(4)
1321 #define M_XGIC_POISONED_REQ_MASK BIT(3)
1322 #define XGIC_POISONED_REQ_MASK BIT(2)
1323 #define M_WRERR_RESP_MASK BIT(1)
1324 #define WRERR_RESP_MASK BIT(0)
1325 #define IOBBATRANSERRREQINFOL 0x0038
1326 #define IOBBATRANSERRREQINFOH 0x003c
1327 #define REQTYPE_F2_RD(src) ((src) & BIT(0))
1328 #define ERRADDRH_F2_RD(src) (((src) & 0xffc00000) >> 22)
1329 #define IOBBATRANSERRCSWREQID 0x0040
1330 #define XGICTRANSERRINTSTS 0x0050
1331 #define M_WR_ACCESS_ERR_MASK BIT(3)
1332 #define WR_ACCESS_ERR_MASK BIT(2)
1333 #define M_RD_ACCESS_ERR_MASK BIT(1)
1334 #define RD_ACCESS_ERR_MASK BIT(0)
1335 #define XGICTRANSERRINTMSK 0x0054
1336 #define XGICTRANSERRREQINFO 0x0058
1337 #define REQTYPE_MASK BIT(26)
1338 #define ERRADDR_RD(src) ((src) & 0x03ffffff)
1339 #define GLBL_ERR_STS 0x0800
1340 #define MDED_ERR_MASK BIT(3)
1341 #define DED_ERR_MASK BIT(2)
1342 #define MSEC_ERR_MASK BIT(1)
1343 #define SEC_ERR_MASK BIT(0)
1344 #define GLBL_SEC_ERRL 0x0810
1345 #define GLBL_SEC_ERRH 0x0818
1346 #define GLBL_MSEC_ERRL 0x0820
1347 #define GLBL_MSEC_ERRH 0x0828
1348 #define GLBL_DED_ERRL 0x0830
1349 #define GLBL_DED_ERRLMASK 0x0834
1350 #define GLBL_DED_ERRH 0x0838
1351 #define GLBL_DED_ERRHMASK 0x083c
1352 #define GLBL_MDED_ERRL 0x0840
1353 #define GLBL_MDED_ERRLMASK 0x0844
1354 #define GLBL_MDED_ERRH 0x0848
1355 #define GLBL_MDED_ERRHMASK 0x084c
1357 /* IO Bus Registers */
1358 #define RBCSR 0x0000
1359 #define STICKYERR_MASK BIT(0)
1360 #define RBEIR 0x0008
1361 #define AGENT_OFFLINE_ERR_MASK BIT(30)
1362 #define UNIMPL_RBPAGE_ERR_MASK BIT(29)
1363 #define WORD_ALIGNED_ERR_MASK BIT(28)
1364 #define PAGE_ACCESS_ERR_MASK BIT(27)
1365 #define WRITE_ACCESS_MASK BIT(26)
1366 #define RBERRADDR_RD(src) ((src) & 0x03FFFFFF)
1368 static const char * const soc_mem_err_v1[] = {
1369 "10GbE0",
1370 "10GbE1",
1371 "Security",
1372 "SATA45",
1373 "SATA23/ETH23",
1374 "SATA01/ETH01",
1375 "USB1",
1376 "USB0",
1377 "QML",
1378 "QM0",
1379 "QM1 (XGbE01)",
1380 "PCIE4",
1381 "PCIE3",
1382 "PCIE2",
1383 "PCIE1",
1384 "PCIE0",
1385 "CTX Manager",
1386 "OCM",
1387 "1GbE",
1388 "CLE",
1389 "AHBC",
1390 "PktDMA",
1391 "GFC",
1392 "MSLIM",
1393 "10GbE2",
1394 "10GbE3",
1395 "QM2 (XGbE23)",
1396 "IOB",
1397 "unknown",
1398 "unknown",
1399 "unknown",
1400 "unknown",
1403 static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
1405 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1406 u32 err_addr_lo;
1407 u32 err_addr_hi;
1408 u32 reg;
1409 u32 info;
1411 /* GIC transaction error interrupt */
1412 reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS);
1413 if (!reg)
1414 goto chk_iob_err;
1415 dev_err(edac_dev->dev, "XGIC transaction error\n");
1416 if (reg & RD_ACCESS_ERR_MASK)
1417 dev_err(edac_dev->dev, "XGIC read size error\n");
1418 if (reg & M_RD_ACCESS_ERR_MASK)
1419 dev_err(edac_dev->dev, "Multiple XGIC read size error\n");
1420 if (reg & WR_ACCESS_ERR_MASK)
1421 dev_err(edac_dev->dev, "XGIC write size error\n");
1422 if (reg & M_WR_ACCESS_ERR_MASK)
1423 dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
1424 info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
1425 dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
1426 info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
1427 info);
1428 writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
1430 chk_iob_err:
1431 /* IOB memory error */
1432 reg = readl(ctx->dev_csr + GLBL_ERR_STS);
1433 if (!reg)
1434 return;
1435 if (reg & SEC_ERR_MASK) {
1436 err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL);
1437 err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH);
1438 dev_err(edac_dev->dev,
1439 "IOB single-bit correctable memory at 0x%08X.%08X error\n",
1440 err_addr_lo, err_addr_hi);
1441 writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL);
1442 writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH);
1444 if (reg & MSEC_ERR_MASK) {
1445 err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL);
1446 err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH);
1447 dev_err(edac_dev->dev,
1448 "IOB multiple single-bit correctable memory at 0x%08X.%08X error\n",
1449 err_addr_lo, err_addr_hi);
1450 writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL);
1451 writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH);
1453 if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK))
1454 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1456 if (reg & DED_ERR_MASK) {
1457 err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL);
1458 err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH);
1459 dev_err(edac_dev->dev,
1460 "IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1461 err_addr_lo, err_addr_hi);
1462 writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL);
1463 writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH);
1465 if (reg & MDED_ERR_MASK) {
1466 err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL);
1467 err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH);
1468 dev_err(edac_dev->dev,
1469 "Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1470 err_addr_lo, err_addr_hi);
1471 writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL);
1472 writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH);
1474 if (reg & (DED_ERR_MASK | MDED_ERR_MASK))
1475 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1478 static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
1480 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1481 u32 err_addr_lo;
1482 u32 err_addr_hi;
1483 u32 reg;
1485 /* If the register bus resource isn't available, just skip it */
1486 if (!ctx->edac->rb_map)
1487 goto rb_skip;
1490 * Check RB access errors
1491 * 1. Out of range
1492 * 2. Un-implemented page
1493 * 3. Un-aligned access
1494 * 4. Offline slave IP
1496 if (regmap_read(ctx->edac->rb_map, RBCSR, &reg))
1497 return;
1498 if (reg & STICKYERR_MASK) {
1499 bool write;
1500 u32 address;
1502 dev_err(edac_dev->dev, "IOB bus access error(s)\n");
1503 if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
1504 return;
1505 write = reg & WRITE_ACCESS_MASK ? 1 : 0;
1506 address = RBERRADDR_RD(reg);
1507 if (reg & AGENT_OFFLINE_ERR_MASK)
1508 dev_err(edac_dev->dev,
1509 "IOB bus %s access to offline agent error\n",
1510 write ? "write" : "read");
1511 if (reg & UNIMPL_RBPAGE_ERR_MASK)
1512 dev_err(edac_dev->dev,
1513 "IOB bus %s access to unimplemented page error\n",
1514 write ? "write" : "read");
1515 if (reg & WORD_ALIGNED_ERR_MASK)
1516 dev_err(edac_dev->dev,
1517 "IOB bus %s word aligned access error\n",
1518 write ? "write" : "read");
1519 if (reg & PAGE_ACCESS_ERR_MASK)
1520 dev_err(edac_dev->dev,
1521 "IOB bus %s to page out of range access error\n",
1522 write ? "write" : "read");
1523 if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
1524 return;
1525 if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
1526 return;
1528 rb_skip:
1530 /* IOB Bridge agent transaction error interrupt */
1531 reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
1532 if (!reg)
1533 return;
1535 dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n");
1536 if (reg & WRERR_RESP_MASK)
1537 dev_err(edac_dev->dev, "IOB BA write response error\n");
1538 if (reg & M_WRERR_RESP_MASK)
1539 dev_err(edac_dev->dev,
1540 "Multiple IOB BA write response error\n");
1541 if (reg & XGIC_POISONED_REQ_MASK)
1542 dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n");
1543 if (reg & M_XGIC_POISONED_REQ_MASK)
1544 dev_err(edac_dev->dev,
1545 "Multiple IOB BA XGIC poisoned write error\n");
1546 if (reg & RBM_POISONED_REQ_MASK)
1547 dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n");
1548 if (reg & M_RBM_POISONED_REQ_MASK)
1549 dev_err(edac_dev->dev,
1550 "Multiple IOB BA RBM poisoned write error\n");
1551 if (reg & WDATA_CORRUPT_MASK)
1552 dev_err(edac_dev->dev, "IOB BA write error\n");
1553 if (reg & M_WDATA_CORRUPT_MASK)
1554 dev_err(edac_dev->dev, "Multiple IOB BA write error\n");
1555 if (reg & TRANS_CORRUPT_MASK)
1556 dev_err(edac_dev->dev, "IOB BA transaction error\n");
1557 if (reg & M_TRANS_CORRUPT_MASK)
1558 dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n");
1559 if (reg & RIDRAM_CORRUPT_MASK)
1560 dev_err(edac_dev->dev,
1561 "IOB BA RDIDRAM read transaction ID error\n");
1562 if (reg & M_RIDRAM_CORRUPT_MASK)
1563 dev_err(edac_dev->dev,
1564 "Multiple IOB BA RDIDRAM read transaction ID error\n");
1565 if (reg & WIDRAM_CORRUPT_MASK)
1566 dev_err(edac_dev->dev,
1567 "IOB BA RDIDRAM write transaction ID error\n");
1568 if (reg & M_WIDRAM_CORRUPT_MASK)
1569 dev_err(edac_dev->dev,
1570 "Multiple IOB BA RDIDRAM write transaction ID error\n");
1571 if (reg & ILLEGAL_ACCESS_MASK)
1572 dev_err(edac_dev->dev,
1573 "IOB BA XGIC/RB illegal access error\n");
1574 if (reg & M_ILLEGAL_ACCESS_MASK)
1575 dev_err(edac_dev->dev,
1576 "Multiple IOB BA XGIC/RB illegal access error\n");
1578 err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
1579 err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
1580 dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
1581 REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
1582 ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1583 if (reg & WRERR_RESP_MASK)
1584 dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
1585 readl(ctx->dev_csr + IOBBATRANSERRCSWREQID));
1586 writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS);
1589 static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1591 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1592 u32 err_addr_lo;
1593 u32 err_addr_hi;
1594 u32 reg;
1596 /* IOB Processing agent transaction error interrupt */
1597 reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
1598 if (!reg)
1599 goto chk_iob_axi0;
1600 dev_err(edac_dev->dev, "IOB procesing agent (PA) transaction error\n");
1601 if (reg & IOBPA_RDATA_CORRUPT_MASK)
1602 dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1603 if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1604 dev_err(edac_dev->dev,
1605 "Mutilple IOB PA read data RAM error\n");
1606 if (reg & IOBPA_WDATA_CORRUPT_MASK)
1607 dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1608 if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1609 dev_err(edac_dev->dev,
1610 "Mutilple IOB PA write data RAM error\n");
1611 if (reg & IOBPA_TRANS_CORRUPT_MASK)
1612 dev_err(edac_dev->dev, "IOB PA transaction error\n");
1613 if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
1614 dev_err(edac_dev->dev, "Mutilple IOB PA transaction error\n");
1615 if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1616 dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1617 if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
1618 dev_err(edac_dev->dev,
1619 "Multiple IOB PA transaction ID RAM error\n");
1620 writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS);
1622 chk_iob_axi0:
1623 /* IOB AXI0 Error */
1624 reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1625 if (!reg)
1626 goto chk_iob_axi1;
1627 err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL);
1628 err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH);
1629 dev_err(edac_dev->dev,
1630 "%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1631 reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1632 REQTYPE_RD(err_addr_hi) ? "read" : "write",
1633 ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1634 writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1636 chk_iob_axi1:
1637 /* IOB AXI1 Error */
1638 reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1639 if (!reg)
1640 return;
1641 err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL);
1642 err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH);
1643 dev_err(edac_dev->dev,
1644 "%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1645 reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1646 REQTYPE_RD(err_addr_hi) ? "read" : "write",
1647 ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1648 writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1651 static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev)
1653 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1654 const char * const *soc_mem_err = NULL;
1655 u32 pcp_hp_stat;
1656 u32 pcp_lp_stat;
1657 u32 reg;
1658 int i;
1660 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
1661 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
1662 xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, &reg);
1663 if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK |
1664 IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) ||
1665 (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg))
1666 return;
1668 if (pcp_hp_stat & IOB_XGIC_ERR_MASK)
1669 xgene_edac_iob_gic_report(edac_dev);
1671 if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK))
1672 xgene_edac_rb_report(edac_dev);
1674 if (pcp_hp_stat & IOB_PA_ERR_MASK)
1675 xgene_edac_pa_report(edac_dev);
1677 if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) {
1678 dev_info(edac_dev->dev,
1679 "CSW switch trace correctable memory parity error\n");
1680 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1683 if (!reg)
1684 return;
1685 if (ctx->version == 1)
1686 soc_mem_err = soc_mem_err_v1;
1687 if (!soc_mem_err) {
1688 dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n",
1689 reg);
1690 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1691 return;
1693 for (i = 0; i < 31; i++) {
1694 if (reg & (1 << i)) {
1695 dev_err(edac_dev->dev, "%s memory parity error\n",
1696 soc_mem_err[i]);
1697 edac_device_handle_ue(edac_dev, 0, 0,
1698 edac_dev->ctl_name);
1703 static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev,
1704 bool enable)
1706 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1708 /* Enable SoC IP error interrupt */
1709 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1710 if (enable) {
1711 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1712 IOB_PA_ERR_MASK |
1713 IOB_BA_ERR_MASK |
1714 IOB_XGIC_ERR_MASK |
1715 IOB_RB_ERR_MASK);
1716 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1717 CSW_SWITCH_TRACE_ERR_MASK);
1718 } else {
1719 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1720 IOB_PA_ERR_MASK |
1721 IOB_BA_ERR_MASK |
1722 IOB_XGIC_ERR_MASK |
1723 IOB_RB_ERR_MASK);
1724 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1725 CSW_SWITCH_TRACE_ERR_MASK);
1728 writel(enable ? 0x0 : 0xFFFFFFFF,
1729 ctx->dev_csr + IOBAXIS0TRANSERRINTMSK);
1730 writel(enable ? 0x0 : 0xFFFFFFFF,
1731 ctx->dev_csr + IOBAXIS1TRANSERRINTMSK);
1732 writel(enable ? 0x0 : 0xFFFFFFFF,
1733 ctx->dev_csr + XGICTRANSERRINTMSK);
1735 xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK,
1736 enable ? 0x0 : 0xFFFFFFFF);
1740 static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
1741 int version)
1743 struct edac_device_ctl_info *edac_dev;
1744 struct xgene_edac_dev_ctx *ctx;
1745 void __iomem *dev_csr;
1746 struct resource res;
1747 int edac_idx;
1748 int rc;
1750 if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL))
1751 return -ENOMEM;
1753 rc = of_address_to_resource(np, 0, &res);
1754 if (rc < 0) {
1755 dev_err(edac->dev, "no SoC resource address\n");
1756 goto err_release_group;
1758 dev_csr = devm_ioremap_resource(edac->dev, &res);
1759 if (IS_ERR(dev_csr)) {
1760 dev_err(edac->dev,
1761 "devm_ioremap_resource failed for soc resource address\n");
1762 rc = PTR_ERR(dev_csr);
1763 goto err_release_group;
1766 edac_idx = edac_device_alloc_index();
1767 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1768 "SOC", 1, "SOC", 1, 2, NULL, 0,
1769 edac_idx);
1770 if (!edac_dev) {
1771 rc = -ENOMEM;
1772 goto err_release_group;
1775 ctx = edac_dev->pvt_info;
1776 ctx->dev_csr = dev_csr;
1777 ctx->name = "xgene_soc_err";
1778 ctx->edac_idx = edac_idx;
1779 ctx->edac = edac;
1780 ctx->edac_dev = edac_dev;
1781 ctx->ddev = *edac->dev;
1782 ctx->version = version;
1783 edac_dev->dev = &ctx->ddev;
1784 edac_dev->ctl_name = ctx->name;
1785 edac_dev->dev_name = ctx->name;
1786 edac_dev->mod_name = EDAC_MOD_STR;
1788 if (edac_op_state == EDAC_OPSTATE_POLL)
1789 edac_dev->edac_check = xgene_edac_soc_check;
1791 rc = edac_device_add_device(edac_dev);
1792 if (rc > 0) {
1793 dev_err(edac->dev, "failed edac_device_add_device()\n");
1794 rc = -ENOMEM;
1795 goto err_ctl_free;
1798 if (edac_op_state == EDAC_OPSTATE_INT)
1799 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1801 list_add(&ctx->next, &edac->socs);
1803 xgene_edac_soc_hw_init(edac_dev, 1);
1805 devres_remove_group(edac->dev, xgene_edac_soc_add);
1807 dev_info(edac->dev, "X-Gene EDAC SoC registered\n");
1809 return 0;
1811 err_ctl_free:
1812 edac_device_free_ctl_info(edac_dev);
1813 err_release_group:
1814 devres_release_group(edac->dev, xgene_edac_soc_add);
1815 return rc;
1818 static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc)
1820 struct edac_device_ctl_info *edac_dev = soc->edac_dev;
1822 xgene_edac_soc_hw_init(edac_dev, 0);
1823 edac_device_del_device(soc->edac->dev);
1824 edac_device_free_ctl_info(edac_dev);
1825 return 0;
1828 static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1830 struct xgene_edac *ctx = dev_id;
1831 struct xgene_edac_pmd_ctx *pmd;
1832 struct xgene_edac_dev_ctx *node;
1833 unsigned int pcp_hp_stat;
1834 unsigned int pcp_lp_stat;
1836 xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1837 xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1838 if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1839 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1840 (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1841 struct xgene_edac_mc_ctx *mcu;
1843 list_for_each_entry(mcu, &ctx->mcus, next)
1844 xgene_edac_mc_check(mcu->mci);
1847 list_for_each_entry(pmd, &ctx->pmds, next) {
1848 if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1849 xgene_edac_pmd_check(pmd->edac_dev);
1852 list_for_each_entry(node, &ctx->l3s, next)
1853 xgene_edac_l3_check(node->edac_dev);
1855 list_for_each_entry(node, &ctx->socs, next)
1856 xgene_edac_soc_check(node->edac_dev);
1858 return IRQ_HANDLED;
1861 static int xgene_edac_probe(struct platform_device *pdev)
1863 struct xgene_edac *edac;
1864 struct device_node *child;
1865 struct resource *res;
1866 int rc;
1868 edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1869 if (!edac)
1870 return -ENOMEM;
1872 edac->dev = &pdev->dev;
1873 platform_set_drvdata(pdev, edac);
1874 INIT_LIST_HEAD(&edac->mcus);
1875 INIT_LIST_HEAD(&edac->pmds);
1876 INIT_LIST_HEAD(&edac->l3s);
1877 INIT_LIST_HEAD(&edac->socs);
1878 spin_lock_init(&edac->lock);
1879 mutex_init(&edac->mc_lock);
1881 edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1882 "regmap-csw");
1883 if (IS_ERR(edac->csw_map)) {
1884 dev_err(edac->dev, "unable to get syscon regmap csw\n");
1885 rc = PTR_ERR(edac->csw_map);
1886 goto out_err;
1889 edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1890 "regmap-mcba");
1891 if (IS_ERR(edac->mcba_map)) {
1892 dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1893 rc = PTR_ERR(edac->mcba_map);
1894 goto out_err;
1897 edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1898 "regmap-mcbb");
1899 if (IS_ERR(edac->mcbb_map)) {
1900 dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1901 rc = PTR_ERR(edac->mcbb_map);
1902 goto out_err;
1904 edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1905 "regmap-efuse");
1906 if (IS_ERR(edac->efuse_map)) {
1907 dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1908 rc = PTR_ERR(edac->efuse_map);
1909 goto out_err;
1913 * NOTE: The register bus resource is optional for compatibility
1914 * reason.
1916 edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1917 "regmap-rb");
1918 if (IS_ERR(edac->rb_map)) {
1919 dev_warn(edac->dev, "missing syscon regmap rb\n");
1920 edac->rb_map = NULL;
1923 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1924 edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1925 if (IS_ERR(edac->pcp_csr)) {
1926 dev_err(&pdev->dev, "no PCP resource address\n");
1927 rc = PTR_ERR(edac->pcp_csr);
1928 goto out_err;
1931 if (edac_op_state == EDAC_OPSTATE_INT) {
1932 int irq;
1933 int i;
1935 for (i = 0; i < 3; i++) {
1936 irq = platform_get_irq(pdev, i);
1937 if (irq < 0) {
1938 dev_err(&pdev->dev, "No IRQ resource\n");
1939 rc = -EINVAL;
1940 goto out_err;
1942 rc = devm_request_irq(&pdev->dev, irq,
1943 xgene_edac_isr, IRQF_SHARED,
1944 dev_name(&pdev->dev), edac);
1945 if (rc) {
1946 dev_err(&pdev->dev,
1947 "Could not request IRQ %d\n", irq);
1948 goto out_err;
1953 edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1955 for_each_child_of_node(pdev->dev.of_node, child) {
1956 if (!of_device_is_available(child))
1957 continue;
1958 if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1959 xgene_edac_mc_add(edac, child);
1960 if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
1961 xgene_edac_pmd_add(edac, child, 1);
1962 if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1963 xgene_edac_pmd_add(edac, child, 2);
1964 if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1965 xgene_edac_l3_add(edac, child, 1);
1966 if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1967 xgene_edac_l3_add(edac, child, 2);
1968 if (of_device_is_compatible(child, "apm,xgene-edac-soc"))
1969 xgene_edac_soc_add(edac, child, 0);
1970 if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1"))
1971 xgene_edac_soc_add(edac, child, 1);
1974 return 0;
1976 out_err:
1977 return rc;
1980 static int xgene_edac_remove(struct platform_device *pdev)
1982 struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1983 struct xgene_edac_mc_ctx *mcu;
1984 struct xgene_edac_mc_ctx *temp_mcu;
1985 struct xgene_edac_pmd_ctx *pmd;
1986 struct xgene_edac_pmd_ctx *temp_pmd;
1987 struct xgene_edac_dev_ctx *node;
1988 struct xgene_edac_dev_ctx *temp_node;
1990 list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
1991 xgene_edac_mc_remove(mcu);
1993 list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
1994 xgene_edac_pmd_remove(pmd);
1996 list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1997 xgene_edac_l3_remove(node);
1999 list_for_each_entry_safe(node, temp_node, &edac->socs, next)
2000 xgene_edac_soc_remove(node);
2002 return 0;
2005 static const struct of_device_id xgene_edac_of_match[] = {
2006 { .compatible = "apm,xgene-edac" },
2009 MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
2011 static struct platform_driver xgene_edac_driver = {
2012 .probe = xgene_edac_probe,
2013 .remove = xgene_edac_remove,
2014 .driver = {
2015 .name = "xgene-edac",
2016 .of_match_table = xgene_edac_of_match,
2020 static int __init xgene_edac_init(void)
2022 int rc;
2024 /* Make sure error reporting method is sane */
2025 switch (edac_op_state) {
2026 case EDAC_OPSTATE_POLL:
2027 case EDAC_OPSTATE_INT:
2028 break;
2029 default:
2030 edac_op_state = EDAC_OPSTATE_INT;
2031 break;
2034 rc = platform_driver_register(&xgene_edac_driver);
2035 if (rc) {
2036 edac_printk(KERN_ERR, EDAC_MOD_STR,
2037 "EDAC fails to register\n");
2038 goto reg_failed;
2041 return 0;
2043 reg_failed:
2044 return rc;
2046 module_init(xgene_edac_init);
2048 static void __exit xgene_edac_exit(void)
2050 platform_driver_unregister(&xgene_edac_driver);
2052 module_exit(xgene_edac_exit);
2054 MODULE_LICENSE("GPL");
2055 MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
2056 MODULE_DESCRIPTION("APM X-Gene EDAC driver");
2057 module_param(edac_op_state, int, 0444);
2058 MODULE_PARM_DESC(edac_op_state,
2059 "EDAC error reporting state: 0=Poll, 2=Interrupt");