1 // SPDX-License-Identifier: GPL-2.0
3 * Bluefield-specific EDAC driver.
5 * Copyright (c) 2019 Mellanox Technologies.
8 #include <linux/acpi.h>
9 #include <linux/arm-smccc.h>
10 #include <linux/bitfield.h>
11 #include <linux/edac.h>
13 #include <linux/module.h>
14 #include <linux/platform_device.h>
16 #include "edac_module.h"
18 #define DRIVER_NAME "bluefield-edac"
21 * Mellanox BlueField EMI (External Memory Interface) register definitions.
24 #define MLXBF_ECC_CNT 0x340
25 #define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0)
26 #define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16)
28 #define MLXBF_ECC_ERR 0x348
29 #define MLXBF_ECC_ERR__SECC BIT(0)
30 #define MLXBF_ECC_ERR__DECC BIT(16)
32 #define MLXBF_ECC_LATCH_SEL 0x354
33 #define MLXBF_ECC_LATCH_SEL__START BIT(24)
35 #define MLXBF_ERR_ADDR_0 0x358
37 #define MLXBF_ERR_ADDR_1 0x37c
39 #define MLXBF_SYNDROM 0x35c
40 #define MLXBF_SYNDROM__DERR BIT(0)
41 #define MLXBF_SYNDROM__SERR BIT(1)
42 #define MLXBF_SYNDROM__SYN GENMASK(25, 16)
44 #define MLXBF_ADD_INFO 0x364
45 #define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8)
47 #define MLXBF_EDAC_MAX_DIMM_PER_MC 2
48 #define MLXBF_EDAC_ERROR_GRAIN 8
51 * Request MLNX_SIP_GET_DIMM_INFO
53 * Retrieve information about DIMM on a certain slot.
55 * Call register usage:
56 * a0: MLNX_SIP_GET_DIMM_INFO
57 * a1: (Memory controller index) << 16 | (Dimm index in memory controller)
61 * a0: MLXBF_DIMM_INFO defined below describing the DIMM.
64 #define MLNX_SIP_GET_DIMM_INFO 0x82000008
66 /* Format for the SMC response about the memory information */
67 #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0)
68 #define MLXBF_DIMM_INFO__IS_RDIMM BIT(16)
69 #define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17)
70 #define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18)
71 #define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21)
72 #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24)
74 struct bluefield_edac_priv
{
75 int dimm_ranks
[MLXBF_EDAC_MAX_DIMM_PER_MC
];
76 void __iomem
*emi_base
;
80 static u64
smc_call1(u64 smc_op
, u64 smc_arg
)
82 struct arm_smccc_res res
;
84 arm_smccc_smc(smc_op
, smc_arg
, 0, 0, 0, 0, 0, 0, &res
);
90 * Gather the ECC information from the External Memory Interface registers
91 * and report it to the edac handler.
93 static void bluefield_gather_report_ecc(struct mem_ctl_info
*mci
,
97 struct bluefield_edac_priv
*priv
= mci
->pvt_info
;
98 u32 dram_additional_info
, err_prank
, edea0
, edea1
;
99 u32 ecc_latch_select
, dram_syndrom
, serr
, derr
, syndrom
;
100 enum hw_event_mc_err_type ecc_type
;
104 ecc_type
= is_single_ecc
? HW_EVENT_ERR_CORRECTED
:
105 HW_EVENT_ERR_UNCORRECTED
;
108 * Tell the External Memory Interface to populate the relevant
109 * registers with information about the last ECC error occurrence.
111 ecc_latch_select
= MLXBF_ECC_LATCH_SEL__START
;
112 writel(ecc_latch_select
, priv
->emi_base
+ MLXBF_ECC_LATCH_SEL
);
115 * Verify that the ECC reported info in the registers is of the
116 * same type as the one asked to report. If not, just report the
117 * error without the detailed information.
119 dram_syndrom
= readl(priv
->emi_base
+ MLXBF_SYNDROM
);
120 serr
= FIELD_GET(MLXBF_SYNDROM__SERR
, dram_syndrom
);
121 derr
= FIELD_GET(MLXBF_SYNDROM__DERR
, dram_syndrom
);
122 syndrom
= FIELD_GET(MLXBF_SYNDROM__SYN
, dram_syndrom
);
124 if ((is_single_ecc
&& !serr
) || (!is_single_ecc
&& !derr
)) {
125 edac_mc_handle_error(ecc_type
, mci
, error_cnt
, 0, 0, 0,
126 0, 0, -1, mci
->ctl_name
, "");
130 dram_additional_info
= readl(priv
->emi_base
+ MLXBF_ADD_INFO
);
131 err_prank
= FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK
, dram_additional_info
);
133 ecc_dimm
= (err_prank
>= 2 && priv
->dimm_ranks
[0] <= 2) ? 1 : 0;
135 edea0
= readl(priv
->emi_base
+ MLXBF_ERR_ADDR_0
);
136 edea1
= readl(priv
->emi_base
+ MLXBF_ERR_ADDR_1
);
138 ecc_dimm_addr
= ((u64
)edea1
<< 32) | edea0
;
140 edac_mc_handle_error(ecc_type
, mci
, error_cnt
,
141 PFN_DOWN(ecc_dimm_addr
),
142 offset_in_page(ecc_dimm_addr
),
143 syndrom
, ecc_dimm
, 0, 0, mci
->ctl_name
, "");
146 static void bluefield_edac_check(struct mem_ctl_info
*mci
)
148 struct bluefield_edac_priv
*priv
= mci
->pvt_info
;
149 u32 ecc_count
, single_error_count
, double_error_count
, ecc_error
= 0;
152 * The memory controller might not be initialized by the firmware
153 * when there isn't memory, which may lead to bad register readings.
155 if (mci
->edac_cap
== EDAC_FLAG_NONE
)
158 ecc_count
= readl(priv
->emi_base
+ MLXBF_ECC_CNT
);
159 single_error_count
= FIELD_GET(MLXBF_ECC_CNT__SERR_CNT
, ecc_count
);
160 double_error_count
= FIELD_GET(MLXBF_ECC_CNT__DERR_CNT
, ecc_count
);
162 if (single_error_count
) {
163 ecc_error
|= MLXBF_ECC_ERR__SECC
;
165 bluefield_gather_report_ecc(mci
, single_error_count
, 1);
168 if (double_error_count
) {
169 ecc_error
|= MLXBF_ECC_ERR__DECC
;
171 bluefield_gather_report_ecc(mci
, double_error_count
, 0);
174 /* Write to clear reported errors. */
176 writel(ecc_error
, priv
->emi_base
+ MLXBF_ECC_ERR
);
179 /* Initialize the DIMMs information for the given memory controller. */
180 static void bluefield_edac_init_dimms(struct mem_ctl_info
*mci
)
182 struct bluefield_edac_priv
*priv
= mci
->pvt_info
;
183 int mem_ctrl_idx
= mci
->mc_idx
;
184 struct dimm_info
*dimm
;
185 u64 smc_info
, smc_arg
;
188 for (i
= 0; i
< priv
->dimm_per_mc
; i
++) {
189 dimm
= mci
->dimms
[i
];
191 smc_arg
= mem_ctrl_idx
<< 16 | i
;
192 smc_info
= smc_call1(MLNX_SIP_GET_DIMM_INFO
, smc_arg
);
194 if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB
, smc_info
)) {
195 dimm
->mtype
= MEM_EMPTY
;
201 dimm
->edac_mode
= EDAC_SECDED
;
203 if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM
, smc_info
))
204 dimm
->mtype
= MEM_NVDIMM
;
205 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM
, smc_info
))
206 dimm
->mtype
= MEM_LRDDR4
;
207 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM
, smc_info
))
208 dimm
->mtype
= MEM_RDDR4
;
210 dimm
->mtype
= MEM_DDR4
;
213 FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB
, smc_info
) *
215 dimm
->grain
= MLXBF_EDAC_ERROR_GRAIN
;
217 /* Mem controller for BlueField only supports x4, x8 and x16 */
218 switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X
, smc_info
)) {
220 dimm
->dtype
= DEV_X4
;
223 dimm
->dtype
= DEV_X8
;
226 dimm
->dtype
= DEV_X16
;
229 dimm
->dtype
= DEV_UNKNOWN
;
232 priv
->dimm_ranks
[i
] =
233 FIELD_GET(MLXBF_DIMM_INFO__RANKS
, smc_info
);
237 mci
->edac_cap
= EDAC_FLAG_NONE
;
239 mci
->edac_cap
= EDAC_FLAG_SECDED
;
242 static int bluefield_edac_mc_probe(struct platform_device
*pdev
)
244 struct bluefield_edac_priv
*priv
;
245 struct device
*dev
= &pdev
->dev
;
246 struct edac_mc_layer layers
[1];
247 struct mem_ctl_info
*mci
;
248 struct resource
*emi_res
;
249 unsigned int mc_idx
, dimm_count
;
252 /* Read the MSS (Memory SubSystem) index from ACPI table. */
253 if (device_property_read_u32(dev
, "mss_number", &mc_idx
)) {
254 dev_warn(dev
, "bf_edac: MSS number unknown\n");
258 /* Read the DIMMs per MC from ACPI table. */
259 if (device_property_read_u32(dev
, "dimm_per_mc", &dimm_count
)) {
260 dev_warn(dev
, "bf_edac: DIMMs per MC unknown\n");
264 if (dimm_count
> MLXBF_EDAC_MAX_DIMM_PER_MC
) {
265 dev_warn(dev
, "bf_edac: DIMMs per MC not valid\n");
269 emi_res
= platform_get_resource(pdev
, IORESOURCE_MEM
, 0);
273 layers
[0].type
= EDAC_MC_LAYER_SLOT
;
274 layers
[0].size
= dimm_count
;
275 layers
[0].is_virt_csrow
= true;
277 mci
= edac_mc_alloc(mc_idx
, ARRAY_SIZE(layers
), layers
, sizeof(*priv
));
281 priv
= mci
->pvt_info
;
283 priv
->dimm_per_mc
= dimm_count
;
284 priv
->emi_base
= devm_ioremap_resource(dev
, emi_res
);
285 if (IS_ERR(priv
->emi_base
)) {
286 dev_err(dev
, "failed to map EMI IO resource\n");
287 ret
= PTR_ERR(priv
->emi_base
);
292 mci
->mtype_cap
= MEM_FLAG_DDR4
| MEM_FLAG_RDDR4
|
293 MEM_FLAG_LRDDR4
| MEM_FLAG_NVDIMM
;
294 mci
->edac_ctl_cap
= EDAC_FLAG_SECDED
;
296 mci
->mod_name
= DRIVER_NAME
;
297 mci
->ctl_name
= "BlueField_Memory_Controller";
298 mci
->dev_name
= dev_name(dev
);
299 mci
->edac_check
= bluefield_edac_check
;
301 /* Initialize mci with the actual populated DIMM information. */
302 bluefield_edac_init_dimms(mci
);
304 platform_set_drvdata(pdev
, mci
);
306 /* Register with EDAC core */
307 rc
= edac_mc_add_mc(mci
);
309 dev_err(dev
, "failed to register with EDAC core\n");
314 /* Only POLL mode supported so far. */
315 edac_op_state
= EDAC_OPSTATE_POLL
;
326 static int bluefield_edac_mc_remove(struct platform_device
*pdev
)
328 struct mem_ctl_info
*mci
= platform_get_drvdata(pdev
);
330 edac_mc_del_mc(&pdev
->dev
);
336 static const struct acpi_device_id bluefield_mc_acpi_ids
[] = {
341 MODULE_DEVICE_TABLE(acpi
, bluefield_mc_acpi_ids
);
343 static struct platform_driver bluefield_edac_mc_driver
= {
346 .acpi_match_table
= bluefield_mc_acpi_ids
,
348 .probe
= bluefield_edac_mc_probe
,
349 .remove
= bluefield_edac_mc_remove
,
352 module_platform_driver(bluefield_edac_mc_driver
);
354 MODULE_DESCRIPTION("Mellanox BlueField memory edac driver");
355 MODULE_AUTHOR("Mellanox Technologies");
356 MODULE_LICENSE("GPL v2");