1 // SPDX-License-Identifier: GPL-2.0
3 * This file implements the error recovery as a core part of PCIe error
4 * reporting. When a PCIe error is delivered, an error message will be
5 * collected and printed to console, then, an error recovery procedure
6 * will be executed by following the PCI error recovery rules.
8 * Copyright (C) 2006 Intel Corp.
9 * Tom Long Nguyen (tom.l.nguyen@intel.com)
10 * Zhang Yanmin (yanmin.zhang@intel.com)
13 #define dev_fmt(fmt) "AER: " fmt
15 #include <linux/pci.h>
16 #include <linux/pm_runtime.h>
17 #include <linux/module.h>
18 #include <linux/kernel.h>
19 #include <linux/errno.h>
20 #include <linux/aer.h>
24 static pci_ers_result_t
merge_result(enum pci_ers_result orig
,
25 enum pci_ers_result
new)
27 if (new == PCI_ERS_RESULT_NO_AER_DRIVER
)
28 return PCI_ERS_RESULT_NO_AER_DRIVER
;
30 if (new == PCI_ERS_RESULT_NONE
)
34 case PCI_ERS_RESULT_CAN_RECOVER
:
35 case PCI_ERS_RESULT_RECOVERED
:
38 case PCI_ERS_RESULT_DISCONNECT
:
39 if (new == PCI_ERS_RESULT_NEED_RESET
)
40 orig
= PCI_ERS_RESULT_NEED_RESET
;
49 static int report_error_detected(struct pci_dev
*dev
,
50 pci_channel_state_t state
,
51 enum pci_ers_result
*result
)
53 struct pci_driver
*pdrv
;
54 pci_ers_result_t vote
;
55 const struct pci_error_handlers
*err_handler
;
57 device_lock(&dev
->dev
);
59 if (pci_dev_is_disconnected(dev
)) {
60 vote
= PCI_ERS_RESULT_DISCONNECT
;
61 } else if (!pci_dev_set_io_state(dev
, state
)) {
62 pci_info(dev
, "can't recover (state transition %u -> %u invalid)\n",
63 dev
->error_state
, state
);
64 vote
= PCI_ERS_RESULT_NONE
;
65 } else if (!pdrv
|| !pdrv
->err_handler
||
66 !pdrv
->err_handler
->error_detected
) {
68 * If any device in the subtree does not have an error_detected
69 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
70 * error callbacks of "any" device in the subtree, and will
71 * exit in the disconnected error state.
73 if (dev
->hdr_type
!= PCI_HEADER_TYPE_BRIDGE
) {
74 vote
= PCI_ERS_RESULT_NO_AER_DRIVER
;
75 pci_info(dev
, "can't recover (no error_detected callback)\n");
77 vote
= PCI_ERS_RESULT_NONE
;
80 err_handler
= pdrv
->err_handler
;
81 vote
= err_handler
->error_detected(dev
, state
);
83 pci_uevent_ers(dev
, vote
);
84 *result
= merge_result(*result
, vote
);
85 device_unlock(&dev
->dev
);
89 static int pci_pm_runtime_get_sync(struct pci_dev
*pdev
, void *data
)
91 pm_runtime_get_sync(&pdev
->dev
);
95 static int pci_pm_runtime_put(struct pci_dev
*pdev
, void *data
)
97 pm_runtime_put(&pdev
->dev
);
101 static int report_frozen_detected(struct pci_dev
*dev
, void *data
)
103 return report_error_detected(dev
, pci_channel_io_frozen
, data
);
106 static int report_normal_detected(struct pci_dev
*dev
, void *data
)
108 return report_error_detected(dev
, pci_channel_io_normal
, data
);
111 static int report_mmio_enabled(struct pci_dev
*dev
, void *data
)
113 struct pci_driver
*pdrv
;
114 pci_ers_result_t vote
, *result
= data
;
115 const struct pci_error_handlers
*err_handler
;
117 device_lock(&dev
->dev
);
119 if (!pdrv
|| !pdrv
->err_handler
|| !pdrv
->err_handler
->mmio_enabled
)
122 err_handler
= pdrv
->err_handler
;
123 vote
= err_handler
->mmio_enabled(dev
);
124 *result
= merge_result(*result
, vote
);
126 device_unlock(&dev
->dev
);
130 static int report_slot_reset(struct pci_dev
*dev
, void *data
)
132 struct pci_driver
*pdrv
;
133 pci_ers_result_t vote
, *result
= data
;
134 const struct pci_error_handlers
*err_handler
;
136 device_lock(&dev
->dev
);
138 if (!pdrv
|| !pdrv
->err_handler
|| !pdrv
->err_handler
->slot_reset
)
141 err_handler
= pdrv
->err_handler
;
142 vote
= err_handler
->slot_reset(dev
);
143 *result
= merge_result(*result
, vote
);
145 device_unlock(&dev
->dev
);
149 static int report_resume(struct pci_dev
*dev
, void *data
)
151 struct pci_driver
*pdrv
;
152 const struct pci_error_handlers
*err_handler
;
154 device_lock(&dev
->dev
);
156 if (!pci_dev_set_io_state(dev
, pci_channel_io_normal
) ||
157 !pdrv
|| !pdrv
->err_handler
|| !pdrv
->err_handler
->resume
)
160 err_handler
= pdrv
->err_handler
;
161 err_handler
->resume(dev
);
163 pci_uevent_ers(dev
, PCI_ERS_RESULT_RECOVERED
);
164 device_unlock(&dev
->dev
);
169 * pci_walk_bridge - walk bridges potentially AER affected
170 * @bridge: bridge which may be a Port, an RCEC, or an RCiEP
171 * @cb: callback to be called for each device found
172 * @userdata: arbitrary pointer to be passed to callback
174 * If the device provided is a bridge, walk the subordinate bus, including
175 * any bridged devices on buses under this bus. Call the provided callback
176 * on each device found.
178 * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP,
179 * call the callback on the device itself.
181 static void pci_walk_bridge(struct pci_dev
*bridge
,
182 int (*cb
)(struct pci_dev
*, void *),
185 if (bridge
->subordinate
)
186 pci_walk_bus(bridge
->subordinate
, cb
, userdata
);
188 cb(bridge
, userdata
);
191 pci_ers_result_t
pcie_do_recovery(struct pci_dev
*dev
,
192 pci_channel_state_t state
,
193 pci_ers_result_t (*reset_subordinates
)(struct pci_dev
*pdev
))
195 int type
= pci_pcie_type(dev
);
196 struct pci_dev
*bridge
;
197 pci_ers_result_t status
= PCI_ERS_RESULT_CAN_RECOVER
;
198 struct pci_host_bridge
*host
= pci_find_host_bridge(dev
->bus
);
201 * If the error was detected by a Root Port, Downstream Port, RCEC,
202 * or RCiEP, recovery runs on the device itself. For Ports, that
203 * also includes any subordinate devices.
205 * If it was detected by another device (Endpoint, etc), recovery
206 * runs on the device and anything else under the same Port, i.e.,
207 * everything under "bridge".
209 if (type
== PCI_EXP_TYPE_ROOT_PORT
||
210 type
== PCI_EXP_TYPE_DOWNSTREAM
||
211 type
== PCI_EXP_TYPE_RC_EC
||
212 type
== PCI_EXP_TYPE_RC_END
)
215 bridge
= pci_upstream_bridge(dev
);
217 pci_walk_bridge(bridge
, pci_pm_runtime_get_sync
, NULL
);
219 pci_dbg(bridge
, "broadcast error_detected message\n");
220 if (state
== pci_channel_io_frozen
) {
221 pci_walk_bridge(bridge
, report_frozen_detected
, &status
);
222 if (reset_subordinates(bridge
) != PCI_ERS_RESULT_RECOVERED
) {
223 pci_warn(bridge
, "subordinate device reset failed\n");
227 pci_walk_bridge(bridge
, report_normal_detected
, &status
);
230 if (status
== PCI_ERS_RESULT_CAN_RECOVER
) {
231 status
= PCI_ERS_RESULT_RECOVERED
;
232 pci_dbg(bridge
, "broadcast mmio_enabled message\n");
233 pci_walk_bridge(bridge
, report_mmio_enabled
, &status
);
236 if (status
== PCI_ERS_RESULT_NEED_RESET
) {
238 * TODO: Should call platform-specific
239 * functions to reset slot before calling
240 * drivers' slot_reset callbacks?
242 status
= PCI_ERS_RESULT_RECOVERED
;
243 pci_dbg(bridge
, "broadcast slot_reset message\n");
244 pci_walk_bridge(bridge
, report_slot_reset
, &status
);
247 if (status
!= PCI_ERS_RESULT_RECOVERED
)
250 pci_dbg(bridge
, "broadcast resume message\n");
251 pci_walk_bridge(bridge
, report_resume
, &status
);
254 * If we have native control of AER, clear error status in the device
255 * that detected the error. If the platform retained control of AER,
256 * it is responsible for clearing this status. In that case, the
257 * signaling device may not even be visible to the OS.
259 if (host
->native_aer
|| pcie_ports_native
) {
260 pcie_clear_device_status(dev
);
261 pci_aer_clear_nonfatal_status(dev
);
264 pci_walk_bridge(bridge
, pci_pm_runtime_put
, NULL
);
266 pci_info(bridge
, "device recovery successful\n");
270 pci_walk_bridge(bridge
, pci_pm_runtime_put
, NULL
);
272 pci_uevent_ers(bridge
, PCI_ERS_RESULT_DISCONNECT
);
274 /* TODO: Should kernel panic here? */
275 pci_info(bridge
, "device recovery failed\n");