1 // SPDX-License-Identifier: GPL-2.0
3 * Bluefield-specific EDAC driver.
5 * Copyright (c) 2019 Mellanox Technologies.
8 #include <linux/acpi.h>
9 #include <linux/arm-smccc.h>
10 #include <linux/bitfield.h>
11 #include <linux/edac.h>
13 #include <linux/module.h>
14 #include <linux/platform_device.h>
16 #include "edac_module.h"
18 #define DRIVER_NAME "bluefield-edac"
21 * Mellanox BlueField EMI (External Memory Interface) register definitions.
24 #define MLXBF_ECC_CNT 0x340
25 #define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0)
26 #define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16)
28 #define MLXBF_ECC_ERR 0x348
29 #define MLXBF_ECC_ERR__SECC BIT(0)
30 #define MLXBF_ECC_ERR__DECC BIT(16)
32 #define MLXBF_ECC_LATCH_SEL 0x354
33 #define MLXBF_ECC_LATCH_SEL__START BIT(24)
35 #define MLXBF_ERR_ADDR_0 0x358
37 #define MLXBF_ERR_ADDR_1 0x37c
39 #define MLXBF_SYNDROM 0x35c
40 #define MLXBF_SYNDROM__DERR BIT(0)
41 #define MLXBF_SYNDROM__SERR BIT(1)
42 #define MLXBF_SYNDROM__SYN GENMASK(25, 16)
44 #define MLXBF_ADD_INFO 0x364
45 #define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8)
47 #define MLXBF_EDAC_MAX_DIMM_PER_MC 2
48 #define MLXBF_EDAC_ERROR_GRAIN 8
50 #define MLXBF_WRITE_REG_32 (0x82000009)
51 #define MLXBF_READ_REG_32 (0x8200000A)
52 #define MLXBF_SIP_SVC_VERSION (0x8200ff03)
54 #define MLXBF_SMCCC_ACCESS_VIOLATION (-4)
56 #define MLXBF_SVC_REQ_MAJOR 0
57 #define MLXBF_SVC_REQ_MINOR 3
60 * Request MLXBF_SIP_GET_DIMM_INFO
62 * Retrieve information about DIMM on a certain slot.
64 * Call register usage:
65 * a0: MLXBF_SIP_GET_DIMM_INFO
66 * a1: (Memory controller index) << 16 | (Dimm index in memory controller)
70 * a0: MLXBF_DIMM_INFO defined below describing the DIMM.
73 #define MLXBF_SIP_GET_DIMM_INFO 0x82000008
75 /* Format for the SMC response about the memory information */
76 #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0)
77 #define MLXBF_DIMM_INFO__IS_RDIMM BIT(16)
78 #define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17)
79 #define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18)
80 #define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21)
81 #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24)
83 struct bluefield_edac_priv
{
84 /* pointer to device structure */
86 int dimm_ranks
[MLXBF_EDAC_MAX_DIMM_PER_MC
];
87 void __iomem
*emi_base
;
89 /* access to secure regs supported */
90 bool svc_sreg_support
;
91 /* SMC table# for secure regs access */
95 static u64
smc_call1(u64 smc_op
, u64 smc_arg
)
97 struct arm_smccc_res res
;
99 arm_smccc_smc(smc_op
, smc_arg
, 0, 0, 0, 0, 0, 0, &res
);
104 static int secure_readl(void __iomem
*addr
, u32
*result
, u32 sreg_tbl
)
106 struct arm_smccc_res res
;
109 arm_smccc_smc(MLXBF_READ_REG_32
, sreg_tbl
, (uintptr_t)addr
,
110 0, 0, 0, 0, 0, &res
);
114 if (status
== SMCCC_RET_NOT_SUPPORTED
||
115 status
== MLXBF_SMCCC_ACCESS_VIOLATION
)
118 *result
= (u32
)res
.a1
;
122 static int secure_writel(void __iomem
*addr
, u32 data
, u32 sreg_tbl
)
124 struct arm_smccc_res res
;
127 arm_smccc_smc(MLXBF_WRITE_REG_32
, sreg_tbl
, data
, (uintptr_t)addr
,
132 if (status
== SMCCC_RET_NOT_SUPPORTED
||
133 status
== MLXBF_SMCCC_ACCESS_VIOLATION
)
139 static int bluefield_edac_readl(struct bluefield_edac_priv
*priv
, u32 offset
, u32
*result
)
144 addr
= priv
->emi_base
+ offset
;
146 if (priv
->svc_sreg_support
)
147 err
= secure_readl(addr
, result
, priv
->sreg_tbl
);
149 *result
= readl(addr
);
154 static int bluefield_edac_writel(struct bluefield_edac_priv
*priv
, u32 offset
, u32 data
)
159 addr
= priv
->emi_base
+ offset
;
161 if (priv
->svc_sreg_support
)
162 err
= secure_writel(addr
, data
, priv
->sreg_tbl
);
170 * Gather the ECC information from the External Memory Interface registers
171 * and report it to the edac handler.
173 static void bluefield_gather_report_ecc(struct mem_ctl_info
*mci
,
177 struct bluefield_edac_priv
*priv
= mci
->pvt_info
;
178 u32 dram_additional_info
, err_prank
, edea0
, edea1
;
179 u32 ecc_latch_select
, dram_syndrom
, serr
, derr
, syndrom
;
180 enum hw_event_mc_err_type ecc_type
;
184 ecc_type
= is_single_ecc
? HW_EVENT_ERR_CORRECTED
:
185 HW_EVENT_ERR_UNCORRECTED
;
188 * Tell the External Memory Interface to populate the relevant
189 * registers with information about the last ECC error occurrence.
191 ecc_latch_select
= MLXBF_ECC_LATCH_SEL__START
;
192 err
= bluefield_edac_writel(priv
, MLXBF_ECC_LATCH_SEL
, ecc_latch_select
);
194 dev_err(priv
->dev
, "ECC latch select write failed.\n");
197 * Verify that the ECC reported info in the registers is of the
198 * same type as the one asked to report. If not, just report the
199 * error without the detailed information.
201 err
= bluefield_edac_readl(priv
, MLXBF_SYNDROM
, &dram_syndrom
);
203 dev_err(priv
->dev
, "DRAM syndrom read failed.\n");
205 serr
= FIELD_GET(MLXBF_SYNDROM__SERR
, dram_syndrom
);
206 derr
= FIELD_GET(MLXBF_SYNDROM__DERR
, dram_syndrom
);
207 syndrom
= FIELD_GET(MLXBF_SYNDROM__SYN
, dram_syndrom
);
209 if ((is_single_ecc
&& !serr
) || (!is_single_ecc
&& !derr
)) {
210 edac_mc_handle_error(ecc_type
, mci
, error_cnt
, 0, 0, 0,
211 0, 0, -1, mci
->ctl_name
, "");
215 err
= bluefield_edac_readl(priv
, MLXBF_ADD_INFO
, &dram_additional_info
);
217 dev_err(priv
->dev
, "DRAM additional info read failed.\n");
219 err_prank
= FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK
, dram_additional_info
);
221 ecc_dimm
= (err_prank
>= 2 && priv
->dimm_ranks
[0] <= 2) ? 1 : 0;
223 err
= bluefield_edac_readl(priv
, MLXBF_ERR_ADDR_0
, &edea0
);
225 dev_err(priv
->dev
, "Error addr 0 read failed.\n");
227 err
= bluefield_edac_readl(priv
, MLXBF_ERR_ADDR_1
, &edea1
);
229 dev_err(priv
->dev
, "Error addr 1 read failed.\n");
231 ecc_dimm_addr
= ((u64
)edea1
<< 32) | edea0
;
233 edac_mc_handle_error(ecc_type
, mci
, error_cnt
,
234 PFN_DOWN(ecc_dimm_addr
),
235 offset_in_page(ecc_dimm_addr
),
236 syndrom
, ecc_dimm
, 0, 0, mci
->ctl_name
, "");
239 static void bluefield_edac_check(struct mem_ctl_info
*mci
)
241 struct bluefield_edac_priv
*priv
= mci
->pvt_info
;
242 u32 ecc_count
, single_error_count
, double_error_count
, ecc_error
= 0;
246 * The memory controller might not be initialized by the firmware
247 * when there isn't memory, which may lead to bad register readings.
249 if (mci
->edac_cap
== EDAC_FLAG_NONE
)
252 err
= bluefield_edac_readl(priv
, MLXBF_ECC_CNT
, &ecc_count
);
254 dev_err(priv
->dev
, "ECC count read failed.\n");
256 single_error_count
= FIELD_GET(MLXBF_ECC_CNT__SERR_CNT
, ecc_count
);
257 double_error_count
= FIELD_GET(MLXBF_ECC_CNT__DERR_CNT
, ecc_count
);
259 if (single_error_count
) {
260 ecc_error
|= MLXBF_ECC_ERR__SECC
;
262 bluefield_gather_report_ecc(mci
, single_error_count
, 1);
265 if (double_error_count
) {
266 ecc_error
|= MLXBF_ECC_ERR__DECC
;
268 bluefield_gather_report_ecc(mci
, double_error_count
, 0);
271 /* Write to clear reported errors. */
273 err
= bluefield_edac_writel(priv
, MLXBF_ECC_ERR
, ecc_error
);
275 dev_err(priv
->dev
, "ECC Error write failed.\n");
279 /* Initialize the DIMMs information for the given memory controller. */
280 static void bluefield_edac_init_dimms(struct mem_ctl_info
*mci
)
282 struct bluefield_edac_priv
*priv
= mci
->pvt_info
;
283 u64 mem_ctrl_idx
= mci
->mc_idx
;
284 struct dimm_info
*dimm
;
285 u64 smc_info
, smc_arg
;
288 for (i
= 0; i
< priv
->dimm_per_mc
; i
++) {
289 dimm
= mci
->dimms
[i
];
291 smc_arg
= mem_ctrl_idx
<< 16 | i
;
292 smc_info
= smc_call1(MLXBF_SIP_GET_DIMM_INFO
, smc_arg
);
294 if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB
, smc_info
)) {
295 dimm
->mtype
= MEM_EMPTY
;
301 dimm
->edac_mode
= EDAC_SECDED
;
303 if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM
, smc_info
))
304 dimm
->mtype
= MEM_NVDIMM
;
305 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM
, smc_info
))
306 dimm
->mtype
= MEM_LRDDR4
;
307 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM
, smc_info
))
308 dimm
->mtype
= MEM_RDDR4
;
310 dimm
->mtype
= MEM_DDR4
;
313 FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB
, smc_info
) *
315 dimm
->grain
= MLXBF_EDAC_ERROR_GRAIN
;
317 /* Mem controller for BlueField only supports x4, x8 and x16 */
318 switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X
, smc_info
)) {
320 dimm
->dtype
= DEV_X4
;
323 dimm
->dtype
= DEV_X8
;
326 dimm
->dtype
= DEV_X16
;
329 dimm
->dtype
= DEV_UNKNOWN
;
332 priv
->dimm_ranks
[i
] =
333 FIELD_GET(MLXBF_DIMM_INFO__RANKS
, smc_info
);
337 mci
->edac_cap
= EDAC_FLAG_NONE
;
339 mci
->edac_cap
= EDAC_FLAG_SECDED
;
342 static int bluefield_edac_mc_probe(struct platform_device
*pdev
)
344 struct bluefield_edac_priv
*priv
;
345 struct device
*dev
= &pdev
->dev
;
346 struct edac_mc_layer layers
[1];
347 struct arm_smccc_res res
;
348 struct mem_ctl_info
*mci
;
349 struct resource
*emi_res
;
350 unsigned int mc_idx
, dimm_count
;
353 /* Read the MSS (Memory SubSystem) index from ACPI table. */
354 if (device_property_read_u32(dev
, "mss_number", &mc_idx
)) {
355 dev_warn(dev
, "bf_edac: MSS number unknown\n");
359 /* Read the DIMMs per MC from ACPI table. */
360 if (device_property_read_u32(dev
, "dimm_per_mc", &dimm_count
)) {
361 dev_warn(dev
, "bf_edac: DIMMs per MC unknown\n");
365 if (dimm_count
> MLXBF_EDAC_MAX_DIMM_PER_MC
) {
366 dev_warn(dev
, "bf_edac: DIMMs per MC not valid\n");
370 emi_res
= platform_get_resource(pdev
, IORESOURCE_MEM
, 0);
374 layers
[0].type
= EDAC_MC_LAYER_SLOT
;
375 layers
[0].size
= dimm_count
;
376 layers
[0].is_virt_csrow
= true;
378 mci
= edac_mc_alloc(mc_idx
, ARRAY_SIZE(layers
), layers
, sizeof(*priv
));
382 priv
= mci
->pvt_info
;
386 * The "sec_reg_block" property in the ACPI table determines the method
387 * the driver uses to access the EMI registers:
388 * a) property is not present - directly access registers via readl/writel
389 * b) property is present - indirectly access registers via SMC calls
390 * (assuming required Silicon Provider service version found)
392 if (device_property_read_u32(dev
, "sec_reg_block", &priv
->sreg_tbl
)) {
393 priv
->svc_sreg_support
= false;
396 * Check for minimum required Arm Silicon Provider (SiP) service
397 * version, ensuring support of required SMC function IDs.
399 arm_smccc_smc(MLXBF_SIP_SVC_VERSION
, 0, 0, 0, 0, 0, 0, 0, &res
);
400 if (res
.a0
== MLXBF_SVC_REQ_MAJOR
&&
401 res
.a1
>= MLXBF_SVC_REQ_MINOR
) {
402 priv
->svc_sreg_support
= true;
404 dev_err(dev
, "Required SMCs are not supported.\n");
410 priv
->dimm_per_mc
= dimm_count
;
411 if (!priv
->svc_sreg_support
) {
412 priv
->emi_base
= devm_ioremap_resource(dev
, emi_res
);
413 if (IS_ERR(priv
->emi_base
)) {
414 dev_err(dev
, "failed to map EMI IO resource\n");
415 ret
= PTR_ERR(priv
->emi_base
);
419 priv
->emi_base
= (void __iomem
*)emi_res
->start
;
423 mci
->mtype_cap
= MEM_FLAG_DDR4
| MEM_FLAG_RDDR4
|
424 MEM_FLAG_LRDDR4
| MEM_FLAG_NVDIMM
;
425 mci
->edac_ctl_cap
= EDAC_FLAG_SECDED
;
427 mci
->mod_name
= DRIVER_NAME
;
428 mci
->ctl_name
= "BlueField_Memory_Controller";
429 mci
->dev_name
= dev_name(dev
);
430 mci
->edac_check
= bluefield_edac_check
;
432 /* Initialize mci with the actual populated DIMM information. */
433 bluefield_edac_init_dimms(mci
);
435 platform_set_drvdata(pdev
, mci
);
437 /* Register with EDAC core */
438 rc
= edac_mc_add_mc(mci
);
440 dev_err(dev
, "failed to register with EDAC core\n");
445 /* Only POLL mode supported so far. */
446 edac_op_state
= EDAC_OPSTATE_POLL
;
456 static void bluefield_edac_mc_remove(struct platform_device
*pdev
)
458 struct mem_ctl_info
*mci
= platform_get_drvdata(pdev
);
460 edac_mc_del_mc(&pdev
->dev
);
464 static const struct acpi_device_id bluefield_mc_acpi_ids
[] = {
469 MODULE_DEVICE_TABLE(acpi
, bluefield_mc_acpi_ids
);
471 static struct platform_driver bluefield_edac_mc_driver
= {
474 .acpi_match_table
= bluefield_mc_acpi_ids
,
476 .probe
= bluefield_edac_mc_probe
,
477 .remove_new
= bluefield_edac_mc_remove
,
480 module_platform_driver(bluefield_edac_mc_driver
);
482 MODULE_DESCRIPTION("Mellanox BlueField memory edac driver");
483 MODULE_AUTHOR("Mellanox Technologies");
484 MODULE_LICENSE("GPL v2");