1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 // Copyright (c) 2018 Mellanox Technologies
4 #include <linux/mlx5/driver.h>
10 struct mlx5_event_nb
{
15 /* General events handlers for the low level mlx5_core driver
17 * Other Major feature specific events such as
18 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
19 * separate notifiers callbacks, specifically by those mlx5 components.
21 static int any_notifier(struct notifier_block
*, unsigned long, void *);
22 static int temp_warn(struct notifier_block
*, unsigned long, void *);
23 static int port_module(struct notifier_block
*, unsigned long, void *);
24 static int pcie_core(struct notifier_block
*, unsigned long, void *);
26 /* handler which forwards the event to events->nh, driver notifiers */
27 static int forward_event(struct notifier_block
*, unsigned long, void *);
29 static struct mlx5_nb events_nbs_ref
[] = {
30 /* Events to be proccessed by mlx5_core */
31 {.nb
.notifier_call
= any_notifier
, .event_type
= MLX5_EVENT_TYPE_NOTIFY_ANY
},
32 {.nb
.notifier_call
= temp_warn
, .event_type
= MLX5_EVENT_TYPE_TEMP_WARN_EVENT
},
33 {.nb
.notifier_call
= port_module
, .event_type
= MLX5_EVENT_TYPE_PORT_MODULE_EVENT
},
34 {.nb
.notifier_call
= pcie_core
, .event_type
= MLX5_EVENT_TYPE_GENERAL_EVENT
},
36 /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
37 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_PORT_CHANGE
},
38 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_GENERAL_EVENT
},
39 /* QP/WQ resource events to forward */
40 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_DCT_DRAINED
},
41 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_PATH_MIG
},
42 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_COMM_EST
},
43 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_SQ_DRAINED
},
44 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_SRQ_LAST_WQE
},
45 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_WQ_CATAS_ERROR
},
46 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_PATH_MIG_FAILED
},
47 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR
},
48 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_WQ_ACCESS_ERROR
},
50 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_SRQ_CATAS_ERROR
},
51 {.nb
.notifier_call
= forward_event
, .event_type
= MLX5_EVENT_TYPE_SRQ_RQ_LIMIT
},
55 struct mlx5_core_dev
*dev
;
56 struct workqueue_struct
*wq
;
57 struct mlx5_event_nb notifiers
[ARRAY_SIZE(events_nbs_ref
)];
58 /* driver notifier chain */
59 struct atomic_notifier_head nh
;
60 /* port module events stats */
61 struct mlx5_pme_stats pme_stats
;
63 struct work_struct pcie_core_work
;
66 static const char *eqe_type_str(u8 type
)
69 case MLX5_EVENT_TYPE_COMP
:
70 return "MLX5_EVENT_TYPE_COMP";
71 case MLX5_EVENT_TYPE_PATH_MIG
:
72 return "MLX5_EVENT_TYPE_PATH_MIG";
73 case MLX5_EVENT_TYPE_COMM_EST
:
74 return "MLX5_EVENT_TYPE_COMM_EST";
75 case MLX5_EVENT_TYPE_SQ_DRAINED
:
76 return "MLX5_EVENT_TYPE_SQ_DRAINED";
77 case MLX5_EVENT_TYPE_SRQ_LAST_WQE
:
78 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
79 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT
:
80 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
81 case MLX5_EVENT_TYPE_CQ_ERROR
:
82 return "MLX5_EVENT_TYPE_CQ_ERROR";
83 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR
:
84 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
85 case MLX5_EVENT_TYPE_PATH_MIG_FAILED
:
86 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
87 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR
:
88 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
89 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR
:
90 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
91 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR
:
92 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
93 case MLX5_EVENT_TYPE_INTERNAL_ERROR
:
94 return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
95 case MLX5_EVENT_TYPE_PORT_CHANGE
:
96 return "MLX5_EVENT_TYPE_PORT_CHANGE";
97 case MLX5_EVENT_TYPE_GPIO_EVENT
:
98 return "MLX5_EVENT_TYPE_GPIO_EVENT";
99 case MLX5_EVENT_TYPE_PORT_MODULE_EVENT
:
100 return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
101 case MLX5_EVENT_TYPE_TEMP_WARN_EVENT
:
102 return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
103 case MLX5_EVENT_TYPE_REMOTE_CONFIG
:
104 return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
105 case MLX5_EVENT_TYPE_DB_BF_CONGESTION
:
106 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
107 case MLX5_EVENT_TYPE_STALL_EVENT
:
108 return "MLX5_EVENT_TYPE_STALL_EVENT";
109 case MLX5_EVENT_TYPE_CMD
:
110 return "MLX5_EVENT_TYPE_CMD";
111 case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED
:
112 return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
113 case MLX5_EVENT_TYPE_PAGE_REQUEST
:
114 return "MLX5_EVENT_TYPE_PAGE_REQUEST";
115 case MLX5_EVENT_TYPE_PAGE_FAULT
:
116 return "MLX5_EVENT_TYPE_PAGE_FAULT";
117 case MLX5_EVENT_TYPE_PPS_EVENT
:
118 return "MLX5_EVENT_TYPE_PPS_EVENT";
119 case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE
:
120 return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
121 case MLX5_EVENT_TYPE_FPGA_ERROR
:
122 return "MLX5_EVENT_TYPE_FPGA_ERROR";
123 case MLX5_EVENT_TYPE_FPGA_QP_ERROR
:
124 return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
125 case MLX5_EVENT_TYPE_GENERAL_EVENT
:
126 return "MLX5_EVENT_TYPE_GENERAL_EVENT";
127 case MLX5_EVENT_TYPE_MONITOR_COUNTER
:
128 return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
129 case MLX5_EVENT_TYPE_DEVICE_TRACER
:
130 return "MLX5_EVENT_TYPE_DEVICE_TRACER";
132 return "Unrecognized event";
136 /* handles all FW events, type == eqe->type */
137 static int any_notifier(struct notifier_block
*nb
,
138 unsigned long type
, void *data
)
140 struct mlx5_event_nb
*event_nb
= mlx5_nb_cof(nb
, struct mlx5_event_nb
, nb
);
141 struct mlx5_events
*events
= event_nb
->ctx
;
142 struct mlx5_eqe
*eqe
= data
;
144 mlx5_core_dbg(events
->dev
, "Async eqe type %s, subtype (%d)\n",
145 eqe_type_str(eqe
->type
), eqe
->sub_type
);
149 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
150 static int temp_warn(struct notifier_block
*nb
, unsigned long type
, void *data
)
152 struct mlx5_event_nb
*event_nb
= mlx5_nb_cof(nb
, struct mlx5_event_nb
, nb
);
153 struct mlx5_events
*events
= event_nb
->ctx
;
154 struct mlx5_eqe
*eqe
= data
;
158 value_lsb
= be64_to_cpu(eqe
->data
.temp_warning
.sensor_warning_lsb
);
159 value_msb
= be64_to_cpu(eqe
->data
.temp_warning
.sensor_warning_msb
);
161 mlx5_core_warn(events
->dev
,
162 "High temperature on sensors with bit set %llx %llx",
163 value_msb
, value_lsb
);
168 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
169 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status
)
172 case MLX5_MODULE_STATUS_PLUGGED
:
173 return "Cable plugged";
174 case MLX5_MODULE_STATUS_UNPLUGGED
:
175 return "Cable unplugged";
176 case MLX5_MODULE_STATUS_ERROR
:
177 return "Cable error";
178 case MLX5_MODULE_STATUS_DISABLED
:
179 return "Cable disabled";
181 return "Unknown status";
185 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error
)
188 case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED
:
189 return "Power budget exceeded";
190 case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX
:
191 return "Long Range for non MLNX cable";
192 case MLX5_MODULE_EVENT_ERROR_BUS_STUCK
:
193 return "Bus stuck (I2C or data shorted)";
194 case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT
:
195 return "No EEPROM/retry timeout";
196 case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST
:
197 return "Enforce part number list";
198 case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER
:
199 return "Unknown identifier";
200 case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE
:
201 return "High Temperature";
202 case MLX5_MODULE_EVENT_ERROR_BAD_CABLE
:
203 return "Bad or shorted cable/module";
204 case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED
:
205 return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
207 return "Unknown error";
211 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
212 static int port_module(struct notifier_block
*nb
, unsigned long type
, void *data
)
214 struct mlx5_event_nb
*event_nb
= mlx5_nb_cof(nb
, struct mlx5_event_nb
, nb
);
215 struct mlx5_events
*events
= event_nb
->ctx
;
216 struct mlx5_eqe
*eqe
= data
;
218 enum port_module_event_status_type module_status
;
219 enum port_module_event_error_type error_type
;
220 struct mlx5_eqe_port_module
*module_event_eqe
;
221 const char *status_str
;
224 module_event_eqe
= &eqe
->data
.port_module
;
225 module_status
= module_event_eqe
->module_status
&
226 PORT_MODULE_EVENT_MODULE_STATUS_MASK
;
227 error_type
= module_event_eqe
->error_type
&
228 PORT_MODULE_EVENT_ERROR_TYPE_MASK
;
230 if (module_status
< MLX5_MODULE_STATUS_NUM
)
231 events
->pme_stats
.status_counters
[module_status
]++;
233 if (module_status
== MLX5_MODULE_STATUS_ERROR
)
234 if (error_type
< MLX5_MODULE_EVENT_ERROR_NUM
)
235 events
->pme_stats
.error_counters
[error_type
]++;
237 if (!printk_ratelimit())
240 module_num
= module_event_eqe
->module
;
241 status_str
= mlx5_pme_status_to_string(module_status
);
242 if (module_status
== MLX5_MODULE_STATUS_ERROR
) {
243 const char *error_str
= mlx5_pme_error_to_string(error_type
);
245 mlx5_core_err(events
->dev
,
246 "Port module event[error]: module %u, %s, %s\n",
247 module_num
, status_str
, error_str
);
249 mlx5_core_info(events
->dev
,
250 "Port module event: module %u, %s\n",
251 module_num
, status_str
);
258 MLX5_PCI_POWER_COULD_NOT_BE_READ
= 0x0,
259 MLX5_PCI_POWER_SUFFICIENT_REPORTED
= 0x1,
260 MLX5_PCI_POWER_INSUFFICIENT_REPORTED
= 0x2,
263 static void mlx5_pcie_event(struct work_struct
*work
)
265 u32 out
[MLX5_ST_SZ_DW(mpein_reg
)] = {0};
266 u32 in
[MLX5_ST_SZ_DW(mpein_reg
)] = {0};
267 struct mlx5_events
*events
;
268 struct mlx5_core_dev
*dev
;
272 events
= container_of(work
, struct mlx5_events
, pcie_core_work
);
275 if (!MLX5_CAP_MCAM_FEATURE(dev
, pci_status_and_power
))
278 mlx5_core_access_reg(dev
, in
, sizeof(in
), out
, sizeof(out
),
279 MLX5_REG_MPEIN
, 0, 0);
280 power_status
= MLX5_GET(mpein_reg
, out
, pwr_status
);
281 pci_power
= MLX5_GET(mpein_reg
, out
, pci_power
);
283 switch (power_status
) {
284 case MLX5_PCI_POWER_COULD_NOT_BE_READ
:
285 mlx5_core_info_rl(dev
,
286 "PCIe slot power capability was not advertised.\n");
288 case MLX5_PCI_POWER_INSUFFICIENT_REPORTED
:
289 mlx5_core_warn_rl(dev
,
290 "Detected insufficient power on the PCIe slot (%uW).\n",
293 case MLX5_PCI_POWER_SUFFICIENT_REPORTED
:
294 mlx5_core_info_rl(dev
,
295 "PCIe slot advertised sufficient power (%uW).\n",
301 static int pcie_core(struct notifier_block
*nb
, unsigned long type
, void *data
)
303 struct mlx5_event_nb
*event_nb
= mlx5_nb_cof(nb
,
304 struct mlx5_event_nb
,
306 struct mlx5_events
*events
= event_nb
->ctx
;
307 struct mlx5_eqe
*eqe
= data
;
309 switch (eqe
->sub_type
) {
310 case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT
:
311 queue_work(events
->wq
, &events
->pcie_core_work
);
320 void mlx5_get_pme_stats(struct mlx5_core_dev
*dev
, struct mlx5_pme_stats
*stats
)
322 *stats
= dev
->priv
.events
->pme_stats
;
325 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
326 static int forward_event(struct notifier_block
*nb
, unsigned long event
, void *data
)
328 struct mlx5_event_nb
*event_nb
= mlx5_nb_cof(nb
, struct mlx5_event_nb
, nb
);
329 struct mlx5_events
*events
= event_nb
->ctx
;
330 struct mlx5_eqe
*eqe
= data
;
332 mlx5_core_dbg(events
->dev
, "Async eqe type %s, subtype (%d) forward to interfaces\n",
333 eqe_type_str(eqe
->type
), eqe
->sub_type
);
334 atomic_notifier_call_chain(&events
->nh
, event
, data
);
338 int mlx5_events_init(struct mlx5_core_dev
*dev
)
340 struct mlx5_events
*events
= kzalloc(sizeof(*events
), GFP_KERNEL
);
345 ATOMIC_INIT_NOTIFIER_HEAD(&events
->nh
);
347 dev
->priv
.events
= events
;
348 events
->wq
= create_singlethread_workqueue("mlx5_events");
353 INIT_WORK(&events
->pcie_core_work
, mlx5_pcie_event
);
358 void mlx5_events_cleanup(struct mlx5_core_dev
*dev
)
360 destroy_workqueue(dev
->priv
.events
->wq
);
361 kvfree(dev
->priv
.events
);
364 void mlx5_events_start(struct mlx5_core_dev
*dev
)
366 struct mlx5_events
*events
= dev
->priv
.events
;
369 for (i
= 0; i
< ARRAY_SIZE(events_nbs_ref
); i
++) {
370 events
->notifiers
[i
].nb
= events_nbs_ref
[i
];
371 events
->notifiers
[i
].ctx
= events
;
372 mlx5_eq_notifier_register(dev
, &events
->notifiers
[i
].nb
);
376 void mlx5_events_stop(struct mlx5_core_dev
*dev
)
378 struct mlx5_events
*events
= dev
->priv
.events
;
381 for (i
= ARRAY_SIZE(events_nbs_ref
) - 1; i
>= 0 ; i
--)
382 mlx5_eq_notifier_unregister(dev
, &events
->notifiers
[i
].nb
);
383 flush_workqueue(events
->wq
);
386 int mlx5_notifier_register(struct mlx5_core_dev
*dev
, struct notifier_block
*nb
)
388 struct mlx5_events
*events
= dev
->priv
.events
;
390 return atomic_notifier_chain_register(&events
->nh
, nb
);
392 EXPORT_SYMBOL(mlx5_notifier_register
);
394 int mlx5_notifier_unregister(struct mlx5_core_dev
*dev
, struct notifier_block
*nb
)
396 struct mlx5_events
*events
= dev
->priv
.events
;
398 return atomic_notifier_chain_unregister(&events
->nh
, nb
);
400 EXPORT_SYMBOL(mlx5_notifier_unregister
);
402 int mlx5_notifier_call_chain(struct mlx5_events
*events
, unsigned int event
, void *data
)
404 return atomic_notifier_call_chain(&events
->nh
, event
, data
);