1 // SPDX-License-Identifier: GPL-2.0
3 * This file implements the error recovery as a core part of PCIe error
4 * reporting. When a PCIe error is delivered, an error message will be
5 * collected and printed to console, then, an error recovery procedure
6 * will be executed by following the PCI error recovery rules.
8 * Copyright (C) 2006 Intel Corp.
9 * Tom Long Nguyen (tom.l.nguyen@intel.com)
10 * Zhang Yanmin (yanmin.zhang@intel.com)
13 #include <linux/pci.h>
14 #include <linux/module.h>
15 #include <linux/pci.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/aer.h>
22 struct aer_broadcast_data
{
23 enum pci_channel_state state
;
24 enum pci_ers_result result
;
27 static pci_ers_result_t
merge_result(enum pci_ers_result orig
,
28 enum pci_ers_result
new)
30 if (new == PCI_ERS_RESULT_NO_AER_DRIVER
)
31 return PCI_ERS_RESULT_NO_AER_DRIVER
;
33 if (new == PCI_ERS_RESULT_NONE
)
37 case PCI_ERS_RESULT_CAN_RECOVER
:
38 case PCI_ERS_RESULT_RECOVERED
:
41 case PCI_ERS_RESULT_DISCONNECT
:
42 if (new == PCI_ERS_RESULT_NEED_RESET
)
43 orig
= PCI_ERS_RESULT_NEED_RESET
;
52 static int report_error_detected(struct pci_dev
*dev
, void *data
)
54 pci_ers_result_t vote
;
55 const struct pci_error_handlers
*err_handler
;
56 struct aer_broadcast_data
*result_data
;
58 result_data
= (struct aer_broadcast_data
*) data
;
60 device_lock(&dev
->dev
);
61 dev
->error_state
= result_data
->state
;
64 !dev
->driver
->err_handler
||
65 !dev
->driver
->err_handler
->error_detected
) {
67 * If any device in the subtree does not have an error_detected
68 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
69 * error callbacks of "any" device in the subtree, and will
70 * exit in the disconnected error state.
72 if (dev
->hdr_type
!= PCI_HEADER_TYPE_BRIDGE
)
73 vote
= PCI_ERS_RESULT_NO_AER_DRIVER
;
75 vote
= PCI_ERS_RESULT_NONE
;
77 err_handler
= dev
->driver
->err_handler
;
78 vote
= err_handler
->error_detected(dev
, result_data
->state
);
79 pci_uevent_ers(dev
, PCI_ERS_RESULT_NONE
);
82 result_data
->result
= merge_result(result_data
->result
, vote
);
83 device_unlock(&dev
->dev
);
87 static int report_mmio_enabled(struct pci_dev
*dev
, void *data
)
89 pci_ers_result_t vote
;
90 const struct pci_error_handlers
*err_handler
;
91 struct aer_broadcast_data
*result_data
;
93 result_data
= (struct aer_broadcast_data
*) data
;
95 device_lock(&dev
->dev
);
97 !dev
->driver
->err_handler
||
98 !dev
->driver
->err_handler
->mmio_enabled
)
101 err_handler
= dev
->driver
->err_handler
;
102 vote
= err_handler
->mmio_enabled(dev
);
103 result_data
->result
= merge_result(result_data
->result
, vote
);
105 device_unlock(&dev
->dev
);
109 static int report_slot_reset(struct pci_dev
*dev
, void *data
)
111 pci_ers_result_t vote
;
112 const struct pci_error_handlers
*err_handler
;
113 struct aer_broadcast_data
*result_data
;
115 result_data
= (struct aer_broadcast_data
*) data
;
117 device_lock(&dev
->dev
);
119 !dev
->driver
->err_handler
||
120 !dev
->driver
->err_handler
->slot_reset
)
123 err_handler
= dev
->driver
->err_handler
;
124 vote
= err_handler
->slot_reset(dev
);
125 result_data
->result
= merge_result(result_data
->result
, vote
);
127 device_unlock(&dev
->dev
);
131 static int report_resume(struct pci_dev
*dev
, void *data
)
133 const struct pci_error_handlers
*err_handler
;
135 device_lock(&dev
->dev
);
136 dev
->error_state
= pci_channel_io_normal
;
139 !dev
->driver
->err_handler
||
140 !dev
->driver
->err_handler
->resume
)
143 err_handler
= dev
->driver
->err_handler
;
144 err_handler
->resume(dev
);
145 pci_uevent_ers(dev
, PCI_ERS_RESULT_RECOVERED
);
147 device_unlock(&dev
->dev
);
152 * default_reset_link - default reset function
153 * @dev: pointer to pci_dev data structure
155 * Invoked when performing link reset on a Downstream Port or a
156 * Root Port with no aer driver.
158 static pci_ers_result_t
default_reset_link(struct pci_dev
*dev
)
162 rc
= pci_bus_error_reset(dev
);
163 pci_printk(KERN_DEBUG
, dev
, "downstream link has been reset\n");
164 return rc
? PCI_ERS_RESULT_DISCONNECT
: PCI_ERS_RESULT_RECOVERED
;
167 static pci_ers_result_t
reset_link(struct pci_dev
*dev
, u32 service
)
169 pci_ers_result_t status
;
170 struct pcie_port_service_driver
*driver
= NULL
;
172 driver
= pcie_port_find_service(dev
, service
);
173 if (driver
&& driver
->reset_link
) {
174 status
= driver
->reset_link(dev
);
175 } else if (dev
->has_secondary_link
) {
176 status
= default_reset_link(dev
);
178 pci_printk(KERN_DEBUG
, dev
, "no link-reset support at upstream device %s\n",
180 return PCI_ERS_RESULT_DISCONNECT
;
183 if (status
!= PCI_ERS_RESULT_RECOVERED
) {
184 pci_printk(KERN_DEBUG
, dev
, "link reset at upstream device %s failed\n",
186 return PCI_ERS_RESULT_DISCONNECT
;
193 * broadcast_error_message - handle message broadcast to downstream drivers
194 * @dev: pointer to from where in a hierarchy message is broadcasted down
195 * @state: error state
196 * @error_mesg: message to print
197 * @cb: callback to be broadcasted
199 * Invoked during error recovery process. Once being invoked, the content
200 * of error severity will be broadcasted to all downstream drivers in a
201 * hierarchy in question.
203 static pci_ers_result_t
broadcast_error_message(struct pci_dev
*dev
,
204 enum pci_channel_state state
,
206 int (*cb
)(struct pci_dev
*, void *))
208 struct aer_broadcast_data result_data
;
210 pci_printk(KERN_DEBUG
, dev
, "broadcast %s message\n", error_mesg
);
211 result_data
.state
= state
;
212 if (cb
== report_error_detected
)
213 result_data
.result
= PCI_ERS_RESULT_CAN_RECOVER
;
215 result_data
.result
= PCI_ERS_RESULT_RECOVERED
;
217 pci_walk_bus(dev
->subordinate
, cb
, &result_data
);
218 return result_data
.result
;
222 * pcie_do_fatal_recovery - handle fatal error recovery process
223 * @dev: pointer to a pci_dev data structure of agent detecting an error
225 * Invoked when an error is fatal. Once being invoked, removes the devices
226 * beneath this AER agent, followed by reset link e.g. secondary bus reset
227 * followed by re-enumeration of devices.
229 void pcie_do_fatal_recovery(struct pci_dev
*dev
, u32 service
)
231 struct pci_dev
*udev
;
232 struct pci_bus
*parent
;
233 struct pci_dev
*pdev
, *temp
;
234 pci_ers_result_t result
;
236 if (dev
->hdr_type
== PCI_HEADER_TYPE_BRIDGE
)
239 udev
= dev
->bus
->self
;
241 parent
= udev
->subordinate
;
242 pci_lock_rescan_remove();
244 list_for_each_entry_safe_reverse(pdev
, temp
, &parent
->devices
,
247 pci_dev_set_disconnected(pdev
, NULL
);
248 if (pci_has_subordinate(pdev
))
249 pci_walk_bus(pdev
->subordinate
,
250 pci_dev_set_disconnected
, NULL
);
251 pci_stop_and_remove_bus_device(pdev
);
255 result
= reset_link(udev
, service
);
257 if ((service
== PCIE_PORT_SERVICE_AER
) &&
258 (dev
->hdr_type
== PCI_HEADER_TYPE_BRIDGE
)) {
260 * If the error is reported by a bridge, we think this error
261 * is related to the downstream link of the bridge, so we
262 * do error recovery on all subordinates of the bridge instead
263 * of the bridge and clear the error status of the bridge.
265 pci_aer_clear_fatal_status(dev
);
266 pci_aer_clear_device_status(dev
);
269 if (result
== PCI_ERS_RESULT_RECOVERED
) {
270 if (pcie_wait_for_link(udev
, true))
271 pci_rescan_bus(udev
->bus
);
272 pci_info(dev
, "Device recovery from fatal error successful\n");
274 pci_uevent_ers(dev
, PCI_ERS_RESULT_DISCONNECT
);
275 pci_info(dev
, "Device recovery from fatal error failed\n");
279 pci_unlock_rescan_remove();
283 * pcie_do_nonfatal_recovery - handle nonfatal error recovery process
284 * @dev: pointer to a pci_dev data structure of agent detecting an error
286 * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
287 * error detected message to all downstream drivers within a hierarchy in
288 * question and return the returned code.
290 void pcie_do_nonfatal_recovery(struct pci_dev
*dev
)
292 pci_ers_result_t status
;
293 enum pci_channel_state state
;
295 state
= pci_channel_io_normal
;
298 * Error recovery runs on all subordinates of the first downstream port.
299 * If the downstream port detected the error, it is cleared at the end.
301 if (!(pci_pcie_type(dev
) == PCI_EXP_TYPE_ROOT_PORT
||
302 pci_pcie_type(dev
) == PCI_EXP_TYPE_DOWNSTREAM
))
303 dev
= dev
->bus
->self
;
305 status
= broadcast_error_message(dev
,
308 report_error_detected
);
310 if (status
== PCI_ERS_RESULT_CAN_RECOVER
)
311 status
= broadcast_error_message(dev
,
314 report_mmio_enabled
);
316 if (status
== PCI_ERS_RESULT_NEED_RESET
) {
318 * TODO: Should call platform-specific
319 * functions to reset slot before calling
320 * drivers' slot_reset callbacks?
322 status
= broadcast_error_message(dev
,
328 if (status
!= PCI_ERS_RESULT_RECOVERED
)
331 broadcast_error_message(dev
,
336 pci_aer_clear_device_status(dev
);
337 pci_cleanup_aer_uncorrect_error_status(dev
);
338 pci_info(dev
, "AER: Device recovery successful\n");
342 pci_uevent_ers(dev
, PCI_ERS_RESULT_DISCONNECT
);
344 /* TODO: Should kernel panic here? */
345 pci_info(dev
, "AER: Device recovery failed\n");