1 // SPDX-License-Identifier: GPL-2.0-only
3 * Ampere Computing SoC's SMpro Error Monitoring Driver
5 * Copyright (c) 2022, Ampere Computing LLC
9 #include <linux/mod_devicetable.h>
10 #include <linux/module.h>
11 #include <linux/platform_device.h>
12 #include <linux/regmap.h>
14 /* GPI RAS Error Registers */
15 #define GPI_RAS_ERR 0x7E
17 /* Core and L2C Error Registers */
18 #define CORE_CE_ERR_CNT 0x80
19 #define CORE_CE_ERR_LEN 0x81
20 #define CORE_CE_ERR_DATA 0x82
21 #define CORE_UE_ERR_CNT 0x83
22 #define CORE_UE_ERR_LEN 0x84
23 #define CORE_UE_ERR_DATA 0x85
25 /* Memory Error Registers */
26 #define MEM_CE_ERR_CNT 0x90
27 #define MEM_CE_ERR_LEN 0x91
28 #define MEM_CE_ERR_DATA 0x92
29 #define MEM_UE_ERR_CNT 0x93
30 #define MEM_UE_ERR_LEN 0x94
31 #define MEM_UE_ERR_DATA 0x95
33 /* RAS Error/Warning Registers */
34 #define ERR_SMPRO_TYPE 0xA0
35 #define ERR_PMPRO_TYPE 0xA1
36 #define ERR_SMPRO_INFO_LO 0xA2
37 #define ERR_SMPRO_INFO_HI 0xA3
38 #define ERR_SMPRO_DATA_LO 0xA4
39 #define ERR_SMPRO_DATA_HI 0xA5
40 #define WARN_SMPRO_INFO_LO 0xAA
41 #define WARN_SMPRO_INFO_HI 0xAB
42 #define ERR_PMPRO_INFO_LO 0xA6
43 #define ERR_PMPRO_INFO_HI 0xA7
44 #define ERR_PMPRO_DATA_LO 0xA8
45 #define ERR_PMPRO_DATA_HI 0xA9
46 #define WARN_PMPRO_INFO_LO 0xAC
47 #define WARN_PMPRO_INFO_HI 0xAD
49 /* Boot Stage Register */
50 #define BOOTSTAGE 0xB0
51 #define DIMM_SYNDROME_SEL 0xB4
52 #define DIMM_SYNDROME_ERR 0xB5
53 #define DIMM_SYNDROME_STAGE 4
55 /* PCIE Error Registers */
56 #define PCIE_CE_ERR_CNT 0xC0
57 #define PCIE_CE_ERR_LEN 0xC1
58 #define PCIE_CE_ERR_DATA 0xC2
59 #define PCIE_UE_ERR_CNT 0xC3
60 #define PCIE_UE_ERR_LEN 0xC4
61 #define PCIE_UE_ERR_DATA 0xC5
63 /* Other Error Registers */
64 #define OTHER_CE_ERR_CNT 0xD0
65 #define OTHER_CE_ERR_LEN 0xD1
66 #define OTHER_CE_ERR_DATA 0xD2
67 #define OTHER_UE_ERR_CNT 0xD8
68 #define OTHER_UE_ERR_LEN 0xD9
69 #define OTHER_UE_ERR_DATA 0xDA
71 /* Event Data Registers */
72 #define VRD_WARN_FAULT_EVENT_DATA 0x78
73 #define VRD_HOT_EVENT_DATA 0x79
74 #define DIMM_HOT_EVENT_DATA 0x7A
75 #define DIMM_2X_REFRESH_EVENT_DATA 0x96
77 #define MAX_READ_BLOCK_LENGTH 48
79 #define RAS_SMPRO_ERR 0
80 #define RAS_PMPRO_ERR 1
82 enum RAS_48BYTES_ERR_TYPES
{
94 struct smpro_error_hdr
{
95 u8 count
; /* Number of the RAS errors */
96 u8 len
; /* Number of data bytes */
97 u8 data
; /* Start of 48-byte data */
98 u8 max_cnt
; /* Max num of errors */
102 * Included Address of registers to get Count, Length of data and Data
103 * of the 48 bytes error data
105 static struct smpro_error_hdr smpro_error_table
[] = {
107 .count
= CORE_CE_ERR_CNT
,
108 .len
= CORE_CE_ERR_LEN
,
109 .data
= CORE_CE_ERR_DATA
,
113 .count
= CORE_UE_ERR_CNT
,
114 .len
= CORE_UE_ERR_LEN
,
115 .data
= CORE_UE_ERR_DATA
,
119 .count
= MEM_CE_ERR_CNT
,
120 .len
= MEM_CE_ERR_LEN
,
121 .data
= MEM_CE_ERR_DATA
,
125 .count
= MEM_UE_ERR_CNT
,
126 .len
= MEM_UE_ERR_LEN
,
127 .data
= MEM_UE_ERR_DATA
,
131 .count
= PCIE_CE_ERR_CNT
,
132 .len
= PCIE_CE_ERR_LEN
,
133 .data
= PCIE_CE_ERR_DATA
,
137 .count
= PCIE_UE_ERR_CNT
,
138 .len
= PCIE_UE_ERR_LEN
,
139 .data
= PCIE_UE_ERR_DATA
,
143 .count
= OTHER_CE_ERR_CNT
,
144 .len
= OTHER_CE_ERR_LEN
,
145 .data
= OTHER_CE_ERR_DATA
,
149 .count
= OTHER_UE_ERR_CNT
,
150 .len
= OTHER_UE_ERR_LEN
,
151 .data
= OTHER_UE_ERR_DATA
,
157 * List of SCP registers which are used to get
158 * one type of RAS Internal errors.
160 struct smpro_int_error_hdr
{
170 static struct smpro_int_error_hdr list_smpro_int_error_hdr
[] = {
172 .type
= ERR_SMPRO_TYPE
,
173 .info_l
= ERR_SMPRO_INFO_LO
,
174 .info_h
= ERR_SMPRO_INFO_HI
,
175 .data_l
= ERR_SMPRO_DATA_LO
,
176 .data_h
= ERR_SMPRO_DATA_HI
,
177 .warn_l
= WARN_SMPRO_INFO_LO
,
178 .warn_h
= WARN_SMPRO_INFO_HI
,
181 .type
= ERR_PMPRO_TYPE
,
182 .info_l
= ERR_PMPRO_INFO_LO
,
183 .info_h
= ERR_PMPRO_INFO_HI
,
184 .data_l
= ERR_PMPRO_DATA_LO
,
185 .data_h
= ERR_PMPRO_DATA_HI
,
186 .warn_l
= WARN_PMPRO_INFO_LO
,
187 .warn_h
= WARN_PMPRO_INFO_HI
,
191 struct smpro_errmon
{
192 struct regmap
*regmap
;
196 VRD_WARN_FAULT_EVENT
,
199 DIMM_2X_REFRESH_EVENT
,
203 /* Included Address of event source and data registers */
204 static u8 smpro_event_table
[NUM_EVENTS_TYPE
] = {
205 VRD_WARN_FAULT_EVENT_DATA
,
208 DIMM_2X_REFRESH_EVENT_DATA
,
211 static ssize_t
smpro_event_data_read(struct device
*dev
,
212 struct device_attribute
*da
, char *buf
,
215 struct smpro_errmon
*errmon
= dev_get_drvdata(dev
);
219 ret
= regmap_read(errmon
->regmap
, smpro_event_table
[channel
], &event_data
);
222 /* Clear event after read */
224 regmap_write(errmon
->regmap
, smpro_event_table
[channel
], event_data
);
226 return sysfs_emit(buf
, "%04x\n", event_data
);
229 static ssize_t
smpro_overflow_data_read(struct device
*dev
, struct device_attribute
*da
,
230 char *buf
, int channel
)
232 struct smpro_errmon
*errmon
= dev_get_drvdata(dev
);
233 struct smpro_error_hdr
*err_info
;
237 err_info
= &smpro_error_table
[channel
];
239 ret
= regmap_read(errmon
->regmap
, err_info
->count
, &err_count
);
243 /* Bit 8 indicates the overflow status */
244 return sysfs_emit(buf
, "%d\n", (err_count
& BIT(8)) ? 1 : 0);
247 static ssize_t
smpro_error_data_read(struct device
*dev
, struct device_attribute
*da
,
248 char *buf
, int channel
)
250 struct smpro_errmon
*errmon
= dev_get_drvdata(dev
);
251 unsigned char err_data
[MAX_READ_BLOCK_LENGTH
];
252 struct smpro_error_hdr
*err_info
;
253 s32 err_count
, err_length
;
256 err_info
= &smpro_error_table
[channel
];
258 ret
= regmap_read(errmon
->regmap
, err_info
->count
, &err_count
);
259 /* Error count is the low byte */
261 if (ret
|| !err_count
|| err_count
> err_info
->max_cnt
)
264 ret
= regmap_read(errmon
->regmap
, err_info
->len
, &err_length
);
265 if (ret
|| err_length
<= 0)
268 if (err_length
> MAX_READ_BLOCK_LENGTH
)
269 err_length
= MAX_READ_BLOCK_LENGTH
;
271 memset(err_data
, 0x00, MAX_READ_BLOCK_LENGTH
);
272 ret
= regmap_noinc_read(errmon
->regmap
, err_info
->data
, err_data
, err_length
);
276 /* clear the error */
277 ret
= regmap_write(errmon
->regmap
, err_info
->count
, 0x100);
281 * The output of Core/Memory/PCIe/Others UE/CE errors follows the format
282 * specified in section 5.8.1 CE/UE Error Data record in
283 * Altra SOC BMC Interface specification.
285 return sysfs_emit(buf
, "%*phN\n", MAX_READ_BLOCK_LENGTH
, err_data
);
290 * <4-byte hex value of error info><4-byte hex value of error extensive data>
292 * + error info : The error information
293 * + error data : Extensive data (32 bits)
294 * Reference to section 5.10 RAS Internal Error Register Definition in
295 * Altra SOC BMC Interface specification
297 static ssize_t
smpro_internal_err_read(struct device
*dev
, struct device_attribute
*da
,
298 char *buf
, int channel
)
300 struct smpro_errmon
*errmon
= dev_get_drvdata(dev
);
301 struct smpro_int_error_hdr
*err_info
;
302 unsigned int err
[4] = { 0 };
303 unsigned int err_type
;
307 /* read error status */
308 ret
= regmap_read(errmon
->regmap
, GPI_RAS_ERR
, &val
);
312 if ((channel
== RAS_SMPRO_ERR
&& !(val
& BIT(0))) ||
313 (channel
== RAS_PMPRO_ERR
&& !(val
& BIT(1))))
316 err_info
= &list_smpro_int_error_hdr
[channel
];
317 ret
= regmap_read(errmon
->regmap
, err_info
->type
, &val
);
321 err_type
= (val
& BIT(1)) ? BIT(1) :
322 (val
& BIT(2)) ? BIT(2) : 0;
327 ret
= regmap_read(errmon
->regmap
, err_info
->info_l
, err
+ 1);
331 ret
= regmap_read(errmon
->regmap
, err_info
->info_h
, err
);
335 if (err_type
& BIT(2)) {
336 /* Error with data type */
337 ret
= regmap_read(errmon
->regmap
, err_info
->data_l
, err
+ 3);
341 ret
= regmap_read(errmon
->regmap
, err_info
->data_h
, err
+ 2);
346 /* clear the read errors */
347 ret
= regmap_write(errmon
->regmap
, err_info
->type
, err_type
);
351 return sysfs_emit(buf
, "%*phN\n", (int)sizeof(err
), err
);
356 * <4-byte hex value of warining info>
357 * Reference to section 5.10 RAS Internal Error Register Definition in
358 * Altra SOC BMC Interface specification
360 static ssize_t
smpro_internal_warn_read(struct device
*dev
, struct device_attribute
*da
,
361 char *buf
, int channel
)
363 struct smpro_errmon
*errmon
= dev_get_drvdata(dev
);
364 struct smpro_int_error_hdr
*err_info
;
365 unsigned int warn
[2] = { 0 };
369 /* read error status */
370 ret
= regmap_read(errmon
->regmap
, GPI_RAS_ERR
, &val
);
374 if ((channel
== RAS_SMPRO_ERR
&& !(val
& BIT(0))) ||
375 (channel
== RAS_PMPRO_ERR
&& !(val
& BIT(1))))
378 err_info
= &list_smpro_int_error_hdr
[channel
];
379 ret
= regmap_read(errmon
->regmap
, err_info
->type
, &val
);
386 ret
= regmap_read(errmon
->regmap
, err_info
->warn_l
, warn
+ 1);
390 ret
= regmap_read(errmon
->regmap
, err_info
->warn_h
, warn
);
394 /* clear the warning */
395 ret
= regmap_write(errmon
->regmap
, err_info
->type
, BIT(0));
399 return sysfs_emit(buf
, "%*phN\n", (int)sizeof(warn
), warn
);
402 #define ERROR_OVERFLOW_RO(_error, _index) \
403 static ssize_t overflow_##_error##_show(struct device *dev, \
404 struct device_attribute *da, \
407 return smpro_overflow_data_read(dev, da, buf, _index); \
409 static DEVICE_ATTR_RO(overflow_##_error)
411 ERROR_OVERFLOW_RO(core_ce
, CORE_CE_ERR
);
412 ERROR_OVERFLOW_RO(core_ue
, CORE_UE_ERR
);
413 ERROR_OVERFLOW_RO(mem_ce
, MEM_CE_ERR
);
414 ERROR_OVERFLOW_RO(mem_ue
, MEM_UE_ERR
);
415 ERROR_OVERFLOW_RO(pcie_ce
, PCIE_CE_ERR
);
416 ERROR_OVERFLOW_RO(pcie_ue
, PCIE_UE_ERR
);
417 ERROR_OVERFLOW_RO(other_ce
, OTHER_CE_ERR
);
418 ERROR_OVERFLOW_RO(other_ue
, OTHER_UE_ERR
);
420 #define ERROR_RO(_error, _index) \
421 static ssize_t error_##_error##_show(struct device *dev, \
422 struct device_attribute *da, \
425 return smpro_error_data_read(dev, da, buf, _index); \
427 static DEVICE_ATTR_RO(error_##_error)
429 ERROR_RO(core_ce
, CORE_CE_ERR
);
430 ERROR_RO(core_ue
, CORE_UE_ERR
);
431 ERROR_RO(mem_ce
, MEM_CE_ERR
);
432 ERROR_RO(mem_ue
, MEM_UE_ERR
);
433 ERROR_RO(pcie_ce
, PCIE_CE_ERR
);
434 ERROR_RO(pcie_ue
, PCIE_UE_ERR
);
435 ERROR_RO(other_ce
, OTHER_CE_ERR
);
436 ERROR_RO(other_ue
, OTHER_UE_ERR
);
438 static ssize_t
error_smpro_show(struct device
*dev
, struct device_attribute
*da
, char *buf
)
440 return smpro_internal_err_read(dev
, da
, buf
, RAS_SMPRO_ERR
);
442 static DEVICE_ATTR_RO(error_smpro
);
444 static ssize_t
error_pmpro_show(struct device
*dev
, struct device_attribute
*da
, char *buf
)
446 return smpro_internal_err_read(dev
, da
, buf
, RAS_PMPRO_ERR
);
448 static DEVICE_ATTR_RO(error_pmpro
);
450 static ssize_t
warn_smpro_show(struct device
*dev
, struct device_attribute
*da
, char *buf
)
452 return smpro_internal_warn_read(dev
, da
, buf
, RAS_SMPRO_ERR
);
454 static DEVICE_ATTR_RO(warn_smpro
);
456 static ssize_t
warn_pmpro_show(struct device
*dev
, struct device_attribute
*da
, char *buf
)
458 return smpro_internal_warn_read(dev
, da
, buf
, RAS_PMPRO_ERR
);
460 static DEVICE_ATTR_RO(warn_pmpro
);
462 #define EVENT_RO(_event, _index) \
463 static ssize_t event_##_event##_show(struct device *dev, \
464 struct device_attribute *da, \
467 return smpro_event_data_read(dev, da, buf, _index); \
469 static DEVICE_ATTR_RO(event_##_event)
471 EVENT_RO(vrd_warn_fault
, VRD_WARN_FAULT_EVENT
);
472 EVENT_RO(vrd_hot
, VRD_HOT_EVENT
);
473 EVENT_RO(dimm_hot
, DIMM_HOT_EVENT
);
474 EVENT_RO(dimm_2x_refresh
, DIMM_2X_REFRESH_EVENT
);
476 static ssize_t
smpro_dimm_syndrome_read(struct device
*dev
, struct device_attribute
*da
,
477 char *buf
, unsigned int slot
)
479 struct smpro_errmon
*errmon
= dev_get_drvdata(dev
);
483 ret
= regmap_read(errmon
->regmap
, BOOTSTAGE
, &data
);
487 /* check for valid stage */
488 data
= (data
>> 8) & 0xff;
489 if (data
!= DIMM_SYNDROME_STAGE
)
492 /* Write the slot ID to retrieve Error Syndrome */
493 ret
= regmap_write(errmon
->regmap
, DIMM_SYNDROME_SEL
, slot
);
497 /* Read the Syndrome error */
498 ret
= regmap_read(errmon
->regmap
, DIMM_SYNDROME_ERR
, &data
);
502 return sysfs_emit(buf
, "%04x\n", data
);
505 #define EVENT_DIMM_SYNDROME(_slot) \
506 static ssize_t event_dimm##_slot##_syndrome_show(struct device *dev, \
507 struct device_attribute *da, \
510 return smpro_dimm_syndrome_read(dev, da, buf, _slot); \
512 static DEVICE_ATTR_RO(event_dimm##_slot##_syndrome)
514 EVENT_DIMM_SYNDROME(0);
515 EVENT_DIMM_SYNDROME(1);
516 EVENT_DIMM_SYNDROME(2);
517 EVENT_DIMM_SYNDROME(3);
518 EVENT_DIMM_SYNDROME(4);
519 EVENT_DIMM_SYNDROME(5);
520 EVENT_DIMM_SYNDROME(6);
521 EVENT_DIMM_SYNDROME(7);
522 EVENT_DIMM_SYNDROME(8);
523 EVENT_DIMM_SYNDROME(9);
524 EVENT_DIMM_SYNDROME(10);
525 EVENT_DIMM_SYNDROME(11);
526 EVENT_DIMM_SYNDROME(12);
527 EVENT_DIMM_SYNDROME(13);
528 EVENT_DIMM_SYNDROME(14);
529 EVENT_DIMM_SYNDROME(15);
531 static struct attribute
*smpro_errmon_attrs
[] = {
532 &dev_attr_overflow_core_ce
.attr
,
533 &dev_attr_overflow_core_ue
.attr
,
534 &dev_attr_overflow_mem_ce
.attr
,
535 &dev_attr_overflow_mem_ue
.attr
,
536 &dev_attr_overflow_pcie_ce
.attr
,
537 &dev_attr_overflow_pcie_ue
.attr
,
538 &dev_attr_overflow_other_ce
.attr
,
539 &dev_attr_overflow_other_ue
.attr
,
540 &dev_attr_error_core_ce
.attr
,
541 &dev_attr_error_core_ue
.attr
,
542 &dev_attr_error_mem_ce
.attr
,
543 &dev_attr_error_mem_ue
.attr
,
544 &dev_attr_error_pcie_ce
.attr
,
545 &dev_attr_error_pcie_ue
.attr
,
546 &dev_attr_error_other_ce
.attr
,
547 &dev_attr_error_other_ue
.attr
,
548 &dev_attr_error_smpro
.attr
,
549 &dev_attr_error_pmpro
.attr
,
550 &dev_attr_warn_smpro
.attr
,
551 &dev_attr_warn_pmpro
.attr
,
552 &dev_attr_event_vrd_warn_fault
.attr
,
553 &dev_attr_event_vrd_hot
.attr
,
554 &dev_attr_event_dimm_hot
.attr
,
555 &dev_attr_event_dimm_2x_refresh
.attr
,
556 &dev_attr_event_dimm0_syndrome
.attr
,
557 &dev_attr_event_dimm1_syndrome
.attr
,
558 &dev_attr_event_dimm2_syndrome
.attr
,
559 &dev_attr_event_dimm3_syndrome
.attr
,
560 &dev_attr_event_dimm4_syndrome
.attr
,
561 &dev_attr_event_dimm5_syndrome
.attr
,
562 &dev_attr_event_dimm6_syndrome
.attr
,
563 &dev_attr_event_dimm7_syndrome
.attr
,
564 &dev_attr_event_dimm8_syndrome
.attr
,
565 &dev_attr_event_dimm9_syndrome
.attr
,
566 &dev_attr_event_dimm10_syndrome
.attr
,
567 &dev_attr_event_dimm11_syndrome
.attr
,
568 &dev_attr_event_dimm12_syndrome
.attr
,
569 &dev_attr_event_dimm13_syndrome
.attr
,
570 &dev_attr_event_dimm14_syndrome
.attr
,
571 &dev_attr_event_dimm15_syndrome
.attr
,
575 ATTRIBUTE_GROUPS(smpro_errmon
);
577 static int smpro_errmon_probe(struct platform_device
*pdev
)
579 struct smpro_errmon
*errmon
;
581 errmon
= devm_kzalloc(&pdev
->dev
, sizeof(struct smpro_errmon
), GFP_KERNEL
);
585 platform_set_drvdata(pdev
, errmon
);
587 errmon
->regmap
= dev_get_regmap(pdev
->dev
.parent
, NULL
);
594 static struct platform_driver smpro_errmon_driver
= {
595 .probe
= smpro_errmon_probe
,
597 .name
= "smpro-errmon",
598 .dev_groups
= smpro_errmon_groups
,
602 module_platform_driver(smpro_errmon_driver
);
604 MODULE_AUTHOR("Tung Nguyen <tung.nguyen@amperecomputing.com>");
605 MODULE_AUTHOR("Thinh Pham <thinh.pham@amperecomputing.com>");
606 MODULE_AUTHOR("Hoang Nguyen <hnguyen@amperecomputing.com>");
607 MODULE_AUTHOR("Thu Nguyen <thu@os.amperecomputing.com>");
608 MODULE_AUTHOR("Quan Nguyen <quan@os.amperecomputing.com>");
609 MODULE_DESCRIPTION("Ampere Altra SMpro driver");
610 MODULE_LICENSE("GPL");