1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * PowerNV Platform dependent EEH operations
5 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
8 #include <linux/atomic.h>
9 #include <linux/debugfs.h>
10 #include <linux/delay.h>
11 #include <linux/export.h>
12 #include <linux/init.h>
13 #include <linux/interrupt.h>
14 #include <linux/list.h>
15 #include <linux/msi.h>
17 #include <linux/pci.h>
18 #include <linux/proc_fs.h>
19 #include <linux/rbtree.h>
20 #include <linux/sched.h>
21 #include <linux/seq_file.h>
22 #include <linux/spinlock.h>
25 #include <asm/eeh_event.h>
26 #include <asm/firmware.h>
28 #include <asm/iommu.h>
29 #include <asm/machdep.h>
30 #include <asm/msi_bitmap.h>
32 #include <asm/ppc-pci.h>
33 #include <asm/pnv-pci.h>
37 #include "../../../../drivers/pci/pci.h"
39 static int eeh_event_irq
= -EINVAL
;
41 static void pnv_pcibios_bus_add_device(struct pci_dev
*pdev
)
43 dev_dbg(&pdev
->dev
, "EEH: Setting up device\n");
44 eeh_probe_device(pdev
);
47 static irqreturn_t
pnv_eeh_event(int irq
, void *data
)
50 * We simply send a special EEH event if EEH has been
51 * enabled. We don't care about EEH events until we've
52 * finished processing the outstanding ones. Event processing
53 * gets unmasked in next_error() if EEH is enabled.
55 disable_irq_nosync(irq
);
58 eeh_send_failure_event(NULL
);
63 #ifdef CONFIG_DEBUG_FS
64 static ssize_t
pnv_eeh_ei_write(struct file
*filp
,
65 const char __user
*user_buf
,
66 size_t count
, loff_t
*ppos
)
68 struct pci_controller
*hose
= filp
->private_data
;
70 int pe_no
, type
, func
;
71 unsigned long addr
, mask
;
75 if (!eeh_ops
|| !eeh_ops
->err_inject
)
78 /* Copy over argument buffer */
79 ret
= simple_write_to_buffer(buf
, sizeof(buf
), ppos
, user_buf
, count
);
83 /* Retrieve parameters */
84 ret
= sscanf(buf
, "%x:%x:%x:%lx:%lx",
85 &pe_no
, &type
, &func
, &addr
, &mask
);
90 pe
= eeh_pe_get(hose
, pe_no
);
94 /* Do error injection */
95 ret
= eeh_ops
->err_inject(pe
, type
, func
, addr
, mask
);
96 return ret
< 0 ? ret
: count
;
99 static const struct file_operations pnv_eeh_ei_fops
= {
102 .write
= pnv_eeh_ei_write
,
105 static int pnv_eeh_dbgfs_set(void *data
, int offset
, u64 val
)
107 struct pci_controller
*hose
= data
;
108 struct pnv_phb
*phb
= hose
->private_data
;
110 out_be64(phb
->regs
+ offset
, val
);
114 static int pnv_eeh_dbgfs_get(void *data
, int offset
, u64
*val
)
116 struct pci_controller
*hose
= data
;
117 struct pnv_phb
*phb
= hose
->private_data
;
119 *val
= in_be64(phb
->regs
+ offset
);
123 #define PNV_EEH_DBGFS_ENTRY(name, reg) \
124 static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \
126 return pnv_eeh_dbgfs_set(data, reg, val); \
129 static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \
131 return pnv_eeh_dbgfs_get(data, reg, val); \
134 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \
135 pnv_eeh_dbgfs_get_##name, \
136 pnv_eeh_dbgfs_set_##name, \
139 PNV_EEH_DBGFS_ENTRY(outb
, 0xD10);
140 PNV_EEH_DBGFS_ENTRY(inbA
, 0xD90);
141 PNV_EEH_DBGFS_ENTRY(inbB
, 0xE10);
143 #endif /* CONFIG_DEBUG_FS */
145 static void pnv_eeh_enable_phbs(void)
147 struct pci_controller
*hose
;
150 list_for_each_entry(hose
, &hose_list
, list_node
) {
151 phb
= hose
->private_data
;
153 * If EEH is enabled, we're going to rely on that.
154 * Otherwise, we restore to conventional mechanism
155 * to clear frozen PE during PCI config access.
158 phb
->flags
|= PNV_PHB_FLAG_EEH
;
160 phb
->flags
&= ~PNV_PHB_FLAG_EEH
;
165 * pnv_eeh_post_init - EEH platform dependent post initialization
167 * EEH platform dependent post initialization on powernv. When
168 * the function is called, the EEH PEs and devices should have
169 * been built. If the I/O cache staff has been built, EEH is
170 * ready to supply service.
172 int pnv_eeh_post_init(void)
174 struct pci_controller
*hose
;
180 /* Register OPAL event notifier */
181 eeh_event_irq
= opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR
));
182 if (eeh_event_irq
< 0) {
183 pr_err("%s: Can't register OPAL event interrupt (%d)\n",
184 __func__
, eeh_event_irq
);
185 return eeh_event_irq
;
188 ret
= request_irq(eeh_event_irq
, pnv_eeh_event
,
189 IRQ_TYPE_LEVEL_HIGH
, "opal-eeh", NULL
);
191 irq_dispose_mapping(eeh_event_irq
);
192 pr_err("%s: Can't request OPAL event interrupt (%d)\n",
193 __func__
, eeh_event_irq
);
198 disable_irq(eeh_event_irq
);
200 pnv_eeh_enable_phbs();
202 list_for_each_entry(hose
, &hose_list
, list_node
) {
203 phb
= hose
->private_data
;
205 /* Create debugfs entries */
206 #ifdef CONFIG_DEBUG_FS
207 if (phb
->has_dbgfs
|| !phb
->dbgfs
)
211 debugfs_create_file("err_injct", 0200,
215 debugfs_create_file("err_injct_outbound", 0600,
217 &pnv_eeh_dbgfs_ops_outb
);
218 debugfs_create_file("err_injct_inboundA", 0600,
220 &pnv_eeh_dbgfs_ops_inbA
);
221 debugfs_create_file("err_injct_inboundB", 0600,
223 &pnv_eeh_dbgfs_ops_inbB
);
224 #endif /* CONFIG_DEBUG_FS */
230 static int pnv_eeh_find_cap(struct pci_dn
*pdn
, int cap
)
232 int pos
= PCI_CAPABILITY_LIST
;
233 int cnt
= 48; /* Maximal number of capabilities */
239 /* Check if the device supports capabilities */
240 pnv_pci_cfg_read(pdn
, PCI_STATUS
, 2, &status
);
241 if (!(status
& PCI_STATUS_CAP_LIST
))
245 pnv_pci_cfg_read(pdn
, pos
, 1, &pos
);
250 pnv_pci_cfg_read(pdn
, pos
+ PCI_CAP_LIST_ID
, 1, &id
);
259 pos
+= PCI_CAP_LIST_NEXT
;
265 static int pnv_eeh_find_ecap(struct pci_dn
*pdn
, int cap
)
267 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
269 int pos
= 256, ttl
= (4096 - 256) / 8;
271 if (!edev
|| !edev
->pcie_cap
)
273 if (pnv_pci_cfg_read(pdn
, pos
, 4, &header
) != PCIBIOS_SUCCESSFUL
)
279 if (PCI_EXT_CAP_ID(header
) == cap
&& pos
)
282 pos
= PCI_EXT_CAP_NEXT(header
);
286 if (pnv_pci_cfg_read(pdn
, pos
, 4, &header
) != PCIBIOS_SUCCESSFUL
)
293 static struct eeh_pe
*pnv_eeh_get_upstream_pe(struct pci_dev
*pdev
)
295 struct pci_controller
*hose
= pdev
->bus
->sysdata
;
296 struct pnv_phb
*phb
= hose
->private_data
;
297 struct pci_dev
*parent
= pdev
->bus
->self
;
299 #ifdef CONFIG_PCI_IOV
300 /* for VFs we use the PF's PE as the upstream PE */
302 parent
= pdev
->physfn
;
305 /* otherwise use the PE of our parent bridge */
307 struct pnv_ioda_pe
*ioda_pe
= pnv_ioda_get_pe(parent
);
309 return eeh_pe_get(phb
->hose
, ioda_pe
->pe_number
);
316 * pnv_eeh_probe - Do probe on PCI device
317 * @pdev: pci_dev to probe
319 * Create, or find the existing, eeh_dev for this pci_dev.
321 static struct eeh_dev
*pnv_eeh_probe(struct pci_dev
*pdev
)
323 struct pci_dn
*pdn
= pci_get_pdn(pdev
);
324 struct pci_controller
*hose
= pdn
->phb
;
325 struct pnv_phb
*phb
= hose
->private_data
;
326 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
327 struct eeh_pe
*upstream_pe
;
330 int config_addr
= (pdn
->busno
<< 8) | (pdn
->devfn
);
333 * When probing the root bridge, which doesn't have any
334 * subordinate PCI devices. We don't have OF node for
335 * the root bridge. So it's not reasonable to continue
338 if (!edev
|| edev
->pe
)
341 /* already configured? */
343 pr_debug("%s: found existing edev for %04x:%02x:%02x.%01x\n",
344 __func__
, hose
->global_number
, config_addr
>> 8,
345 PCI_SLOT(config_addr
), PCI_FUNC(config_addr
));
349 /* Skip for PCI-ISA bridge */
350 if ((pdev
->class >> 8) == PCI_CLASS_BRIDGE_ISA
)
353 eeh_edev_dbg(edev
, "Probing device\n");
355 /* Initialize eeh device */
356 edev
->mode
&= 0xFFFFFF00;
357 edev
->pcix_cap
= pnv_eeh_find_cap(pdn
, PCI_CAP_ID_PCIX
);
358 edev
->pcie_cap
= pnv_eeh_find_cap(pdn
, PCI_CAP_ID_EXP
);
359 edev
->af_cap
= pnv_eeh_find_cap(pdn
, PCI_CAP_ID_AF
);
360 edev
->aer_cap
= pnv_eeh_find_ecap(pdn
, PCI_EXT_CAP_ID_ERR
);
361 if ((pdev
->class >> 8) == PCI_CLASS_BRIDGE_PCI
) {
362 edev
->mode
|= EEH_DEV_BRIDGE
;
363 if (edev
->pcie_cap
) {
364 pnv_pci_cfg_read(pdn
, edev
->pcie_cap
+ PCI_EXP_FLAGS
,
366 pcie_flags
= (pcie_flags
& PCI_EXP_FLAGS_TYPE
) >> 4;
367 if (pcie_flags
== PCI_EXP_TYPE_ROOT_PORT
)
368 edev
->mode
|= EEH_DEV_ROOT_PORT
;
369 else if (pcie_flags
== PCI_EXP_TYPE_DOWNSTREAM
)
370 edev
->mode
|= EEH_DEV_DS_PORT
;
374 edev
->pe_config_addr
= phb
->ioda
.pe_rmap
[config_addr
];
376 upstream_pe
= pnv_eeh_get_upstream_pe(pdev
);
379 ret
= eeh_pe_tree_insert(edev
, upstream_pe
);
381 eeh_edev_warn(edev
, "Failed to add device to PE (code %d)\n", ret
);
386 * If the PE contains any one of following adapters, the
387 * PCI config space can't be accessed when dumping EEH log.
388 * Otherwise, we will run into fenced PHB caused by shortage
389 * of outbound credits in the adapter. The PCI config access
390 * should be blocked until PE reset. MMIO access is dropped
391 * by hardware certainly. In order to drop PCI config requests,
392 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
393 * will be checked in the backend for PE state retrival. If
394 * the PE becomes frozen for the first time and the flag has
395 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
396 * that PE to block its config space.
398 * Broadcom BCM5718 2-ports NICs (14e4:1656)
399 * Broadcom Austin 4-ports NICs (14e4:1657)
400 * Broadcom Shiner 4-ports 1G NICs (14e4:168a)
401 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
403 if ((pdn
->vendor_id
== PCI_VENDOR_ID_BROADCOM
&&
404 pdn
->device_id
== 0x1656) ||
405 (pdn
->vendor_id
== PCI_VENDOR_ID_BROADCOM
&&
406 pdn
->device_id
== 0x1657) ||
407 (pdn
->vendor_id
== PCI_VENDOR_ID_BROADCOM
&&
408 pdn
->device_id
== 0x168a) ||
409 (pdn
->vendor_id
== PCI_VENDOR_ID_BROADCOM
&&
410 pdn
->device_id
== 0x168e))
411 edev
->pe
->state
|= EEH_PE_CFG_RESTRICTED
;
414 * Cache the PE primary bus, which can't be fetched when
415 * full hotplug is in progress. In that case, all child
416 * PCI devices of the PE are expected to be removed prior
419 if (!(edev
->pe
->state
& EEH_PE_PRI_BUS
)) {
420 edev
->pe
->bus
= pci_find_bus(hose
->global_number
,
423 edev
->pe
->state
|= EEH_PE_PRI_BUS
;
427 * Enable EEH explicitly so that we will do EEH check
428 * while accessing I/O stuff
430 if (!eeh_has_flag(EEH_ENABLED
)) {
431 enable_irq(eeh_event_irq
);
432 pnv_eeh_enable_phbs();
433 eeh_add_flag(EEH_ENABLED
);
436 /* Save memory bars */
439 eeh_edev_dbg(edev
, "EEH enabled on device\n");
445 * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
447 * @option: operation to be issued
449 * The function is used to control the EEH functionality globally.
450 * Currently, following options are support according to PAPR:
451 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
453 static int pnv_eeh_set_option(struct eeh_pe
*pe
, int option
)
455 struct pci_controller
*hose
= pe
->phb
;
456 struct pnv_phb
*phb
= hose
->private_data
;
457 bool freeze_pe
= false;
462 case EEH_OPT_DISABLE
:
466 case EEH_OPT_THAW_MMIO
:
467 opt
= OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO
;
469 case EEH_OPT_THAW_DMA
:
470 opt
= OPAL_EEH_ACTION_CLEAR_FREEZE_DMA
;
472 case EEH_OPT_FREEZE_PE
:
474 opt
= OPAL_EEH_ACTION_SET_FREEZE_ALL
;
477 pr_warn("%s: Invalid option %d\n", __func__
, option
);
481 /* Freeze master and slave PEs if PHB supports compound PEs */
483 if (phb
->freeze_pe
) {
484 phb
->freeze_pe(phb
, pe
->addr
);
488 rc
= opal_pci_eeh_freeze_set(phb
->opal_id
, pe
->addr
, opt
);
489 if (rc
!= OPAL_SUCCESS
) {
490 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
491 __func__
, rc
, phb
->hose
->global_number
,
499 /* Unfreeze master and slave PEs if PHB supports */
500 if (phb
->unfreeze_pe
)
501 return phb
->unfreeze_pe(phb
, pe
->addr
, opt
);
503 rc
= opal_pci_eeh_freeze_clear(phb
->opal_id
, pe
->addr
, opt
);
504 if (rc
!= OPAL_SUCCESS
) {
505 pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
506 __func__
, rc
, option
, phb
->hose
->global_number
,
514 static void pnv_eeh_get_phb_diag(struct eeh_pe
*pe
)
516 struct pnv_phb
*phb
= pe
->phb
->private_data
;
519 rc
= opal_pci_get_phb_diag_data2(phb
->opal_id
, pe
->data
,
520 phb
->diag_data_size
);
521 if (rc
!= OPAL_SUCCESS
)
522 pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
523 __func__
, rc
, pe
->phb
->global_number
);
526 static int pnv_eeh_get_phb_state(struct eeh_pe
*pe
)
528 struct pnv_phb
*phb
= pe
->phb
->private_data
;
534 rc
= opal_pci_eeh_freeze_status(phb
->opal_id
,
539 if (rc
!= OPAL_SUCCESS
) {
540 pr_warn("%s: Failure %lld getting PHB#%x state\n",
541 __func__
, rc
, phb
->hose
->global_number
);
542 return EEH_STATE_NOT_SUPPORT
;
546 * Check PHB state. If the PHB is frozen for the
547 * first time, to dump the PHB diag-data.
549 if (be16_to_cpu(pcierr
) != OPAL_EEH_PHB_ERROR
) {
550 result
= (EEH_STATE_MMIO_ACTIVE
|
551 EEH_STATE_DMA_ACTIVE
|
552 EEH_STATE_MMIO_ENABLED
|
553 EEH_STATE_DMA_ENABLED
);
554 } else if (!(pe
->state
& EEH_PE_ISOLATED
)) {
555 eeh_pe_mark_isolated(pe
);
556 pnv_eeh_get_phb_diag(pe
);
558 if (eeh_has_flag(EEH_EARLY_DUMP_LOG
))
559 pnv_pci_dump_phb_diag_data(pe
->phb
, pe
->data
);
565 static int pnv_eeh_get_pe_state(struct eeh_pe
*pe
)
567 struct pnv_phb
*phb
= pe
->phb
->private_data
;
574 * We don't clobber hardware frozen state until PE
575 * reset is completed. In order to keep EEH core
576 * moving forward, we have to return operational
577 * state during PE reset.
579 if (pe
->state
& EEH_PE_RESET
) {
580 result
= (EEH_STATE_MMIO_ACTIVE
|
581 EEH_STATE_DMA_ACTIVE
|
582 EEH_STATE_MMIO_ENABLED
|
583 EEH_STATE_DMA_ENABLED
);
588 * Fetch PE state from hardware. If the PHB
589 * supports compound PE, let it handle that.
591 if (phb
->get_pe_state
) {
592 fstate
= phb
->get_pe_state(phb
, pe
->addr
);
594 rc
= opal_pci_eeh_freeze_status(phb
->opal_id
,
599 if (rc
!= OPAL_SUCCESS
) {
600 pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
601 __func__
, rc
, phb
->hose
->global_number
,
603 return EEH_STATE_NOT_SUPPORT
;
607 /* Figure out state */
609 case OPAL_EEH_STOPPED_NOT_FROZEN
:
610 result
= (EEH_STATE_MMIO_ACTIVE
|
611 EEH_STATE_DMA_ACTIVE
|
612 EEH_STATE_MMIO_ENABLED
|
613 EEH_STATE_DMA_ENABLED
);
615 case OPAL_EEH_STOPPED_MMIO_FREEZE
:
616 result
= (EEH_STATE_DMA_ACTIVE
|
617 EEH_STATE_DMA_ENABLED
);
619 case OPAL_EEH_STOPPED_DMA_FREEZE
:
620 result
= (EEH_STATE_MMIO_ACTIVE
|
621 EEH_STATE_MMIO_ENABLED
);
623 case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE
:
626 case OPAL_EEH_STOPPED_RESET
:
627 result
= EEH_STATE_RESET_ACTIVE
;
629 case OPAL_EEH_STOPPED_TEMP_UNAVAIL
:
630 result
= EEH_STATE_UNAVAILABLE
;
632 case OPAL_EEH_STOPPED_PERM_UNAVAIL
:
633 result
= EEH_STATE_NOT_SUPPORT
;
636 result
= EEH_STATE_NOT_SUPPORT
;
637 pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
638 __func__
, phb
->hose
->global_number
,
643 * If PHB supports compound PE, to freeze all
644 * slave PEs for consistency.
646 * If the PE is switching to frozen state for the
647 * first time, to dump the PHB diag-data.
649 if (!(result
& EEH_STATE_NOT_SUPPORT
) &&
650 !(result
& EEH_STATE_UNAVAILABLE
) &&
651 !(result
& EEH_STATE_MMIO_ACTIVE
) &&
652 !(result
& EEH_STATE_DMA_ACTIVE
) &&
653 !(pe
->state
& EEH_PE_ISOLATED
)) {
655 phb
->freeze_pe(phb
, pe
->addr
);
657 eeh_pe_mark_isolated(pe
);
658 pnv_eeh_get_phb_diag(pe
);
660 if (eeh_has_flag(EEH_EARLY_DUMP_LOG
))
661 pnv_pci_dump_phb_diag_data(pe
->phb
, pe
->data
);
668 * pnv_eeh_get_state - Retrieve PE state
670 * @delay: delay while PE state is temporarily unavailable
672 * Retrieve the state of the specified PE. For IODA-compitable
673 * platform, it should be retrieved from IODA table. Therefore,
674 * we prefer passing down to hardware implementation to handle
677 static int pnv_eeh_get_state(struct eeh_pe
*pe
, int *delay
)
681 if (pe
->type
& EEH_PE_PHB
)
682 ret
= pnv_eeh_get_phb_state(pe
);
684 ret
= pnv_eeh_get_pe_state(pe
);
690 * If the PE state is temporarily unavailable,
691 * to inform the EEH core delay for default
695 if (ret
& EEH_STATE_UNAVAILABLE
)
701 static s64
pnv_eeh_poll(unsigned long id
)
703 s64 rc
= OPAL_HARDWARE
;
706 rc
= opal_pci_poll(id
);
710 if (system_state
< SYSTEM_RUNNING
)
719 int pnv_eeh_phb_reset(struct pci_controller
*hose
, int option
)
721 struct pnv_phb
*phb
= hose
->private_data
;
722 s64 rc
= OPAL_HARDWARE
;
724 pr_debug("%s: Reset PHB#%x, option=%d\n",
725 __func__
, hose
->global_number
, option
);
727 /* Issue PHB complete reset request */
728 if (option
== EEH_RESET_FUNDAMENTAL
||
729 option
== EEH_RESET_HOT
)
730 rc
= opal_pci_reset(phb
->opal_id
,
731 OPAL_RESET_PHB_COMPLETE
,
733 else if (option
== EEH_RESET_DEACTIVATE
)
734 rc
= opal_pci_reset(phb
->opal_id
,
735 OPAL_RESET_PHB_COMPLETE
,
736 OPAL_DEASSERT_RESET
);
741 * Poll state of the PHB until the request is done
742 * successfully. The PHB reset is usually PHB complete
743 * reset followed by hot reset on root bus. So we also
744 * need the PCI bus settlement delay.
747 rc
= pnv_eeh_poll(phb
->opal_id
);
748 if (option
== EEH_RESET_DEACTIVATE
) {
749 if (system_state
< SYSTEM_RUNNING
)
750 udelay(1000 * EEH_PE_RST_SETTLE_TIME
);
752 msleep(EEH_PE_RST_SETTLE_TIME
);
755 if (rc
!= OPAL_SUCCESS
)
761 static int pnv_eeh_root_reset(struct pci_controller
*hose
, int option
)
763 struct pnv_phb
*phb
= hose
->private_data
;
764 s64 rc
= OPAL_HARDWARE
;
766 pr_debug("%s: Reset PHB#%x, option=%d\n",
767 __func__
, hose
->global_number
, option
);
770 * During the reset deassert time, we needn't care
771 * the reset scope because the firmware does nothing
772 * for fundamental or hot reset during deassert phase.
774 if (option
== EEH_RESET_FUNDAMENTAL
)
775 rc
= opal_pci_reset(phb
->opal_id
,
776 OPAL_RESET_PCI_FUNDAMENTAL
,
778 else if (option
== EEH_RESET_HOT
)
779 rc
= opal_pci_reset(phb
->opal_id
,
782 else if (option
== EEH_RESET_DEACTIVATE
)
783 rc
= opal_pci_reset(phb
->opal_id
,
785 OPAL_DEASSERT_RESET
);
789 /* Poll state of the PHB until the request is done */
791 rc
= pnv_eeh_poll(phb
->opal_id
);
792 if (option
== EEH_RESET_DEACTIVATE
)
793 msleep(EEH_PE_RST_SETTLE_TIME
);
795 if (rc
!= OPAL_SUCCESS
)
801 static int __pnv_eeh_bridge_reset(struct pci_dev
*dev
, int option
)
803 struct pci_dn
*pdn
= pci_get_pdn_by_devfn(dev
->bus
, dev
->devfn
);
804 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
805 int aer
= edev
? edev
->aer_cap
: 0;
808 pr_debug("%s: Secondary Reset PCI bus %04x:%02x with option %d\n",
809 __func__
, pci_domain_nr(dev
->bus
),
810 dev
->bus
->number
, option
);
813 case EEH_RESET_FUNDAMENTAL
:
815 /* Don't report linkDown event */
817 eeh_ops
->read_config(edev
, aer
+ PCI_ERR_UNCOR_MASK
,
819 ctrl
|= PCI_ERR_UNC_SURPDN
;
820 eeh_ops
->write_config(edev
, aer
+ PCI_ERR_UNCOR_MASK
,
824 eeh_ops
->read_config(edev
, PCI_BRIDGE_CONTROL
, 2, &ctrl
);
825 ctrl
|= PCI_BRIDGE_CTL_BUS_RESET
;
826 eeh_ops
->write_config(edev
, PCI_BRIDGE_CONTROL
, 2, ctrl
);
828 msleep(EEH_PE_RST_HOLD_TIME
);
830 case EEH_RESET_DEACTIVATE
:
831 eeh_ops
->read_config(edev
, PCI_BRIDGE_CONTROL
, 2, &ctrl
);
832 ctrl
&= ~PCI_BRIDGE_CTL_BUS_RESET
;
833 eeh_ops
->write_config(edev
, PCI_BRIDGE_CONTROL
, 2, ctrl
);
835 msleep(EEH_PE_RST_SETTLE_TIME
);
837 /* Continue reporting linkDown event */
839 eeh_ops
->read_config(edev
, aer
+ PCI_ERR_UNCOR_MASK
,
841 ctrl
&= ~PCI_ERR_UNC_SURPDN
;
842 eeh_ops
->write_config(edev
, aer
+ PCI_ERR_UNCOR_MASK
,
852 static int pnv_eeh_bridge_reset(struct pci_dev
*pdev
, int option
)
854 struct pci_controller
*hose
= pci_bus_to_host(pdev
->bus
);
855 struct pnv_phb
*phb
= hose
->private_data
;
856 struct device_node
*dn
= pci_device_to_OF_node(pdev
);
857 uint64_t id
= PCI_SLOT_ID(phb
->opal_id
,
858 (pdev
->bus
->number
<< 8) | pdev
->devfn
);
862 /* Hot reset to the bus if firmware cannot handle */
863 if (!dn
|| !of_get_property(dn
, "ibm,reset-by-firmware", NULL
))
864 return __pnv_eeh_bridge_reset(pdev
, option
);
866 pr_debug("%s: FW reset PCI bus %04x:%02x with option %d\n",
867 __func__
, pci_domain_nr(pdev
->bus
),
868 pdev
->bus
->number
, option
);
871 case EEH_RESET_FUNDAMENTAL
:
872 scope
= OPAL_RESET_PCI_FUNDAMENTAL
;
875 scope
= OPAL_RESET_PCI_HOT
;
877 case EEH_RESET_DEACTIVATE
:
880 dev_dbg(&pdev
->dev
, "%s: Unsupported reset %d\n",
885 rc
= opal_pci_reset(id
, scope
, OPAL_ASSERT_RESET
);
886 if (rc
<= OPAL_SUCCESS
)
889 rc
= pnv_eeh_poll(id
);
891 return (rc
== OPAL_SUCCESS
) ? 0 : -EIO
;
894 void pnv_pci_reset_secondary_bus(struct pci_dev
*dev
)
896 struct pci_controller
*hose
;
898 if (pci_is_root_bus(dev
->bus
)) {
899 hose
= pci_bus_to_host(dev
->bus
);
900 pnv_eeh_root_reset(hose
, EEH_RESET_HOT
);
901 pnv_eeh_root_reset(hose
, EEH_RESET_DEACTIVATE
);
903 pnv_eeh_bridge_reset(dev
, EEH_RESET_HOT
);
904 pnv_eeh_bridge_reset(dev
, EEH_RESET_DEACTIVATE
);
908 static void pnv_eeh_wait_for_pending(struct pci_dn
*pdn
, const char *type
,
911 struct eeh_dev
*edev
= pdn
->edev
;
914 /* Wait for Transaction Pending bit to be cleared */
915 for (i
= 0; i
< 4; i
++) {
916 eeh_ops
->read_config(edev
, pos
, 2, &status
);
917 if (!(status
& mask
))
920 msleep((1 << i
) * 100);
923 pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
925 pdn
->phb
->global_number
, pdn
->busno
,
926 PCI_SLOT(pdn
->devfn
), PCI_FUNC(pdn
->devfn
));
929 static int pnv_eeh_do_flr(struct pci_dn
*pdn
, int option
)
931 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
934 if (WARN_ON(!edev
->pcie_cap
))
937 eeh_ops
->read_config(edev
, edev
->pcie_cap
+ PCI_EXP_DEVCAP
, 4, ®
);
938 if (!(reg
& PCI_EXP_DEVCAP_FLR
))
943 case EEH_RESET_FUNDAMENTAL
:
944 pnv_eeh_wait_for_pending(pdn
, "",
945 edev
->pcie_cap
+ PCI_EXP_DEVSTA
,
946 PCI_EXP_DEVSTA_TRPND
);
947 eeh_ops
->read_config(edev
, edev
->pcie_cap
+ PCI_EXP_DEVCTL
,
949 reg
|= PCI_EXP_DEVCTL_BCR_FLR
;
950 eeh_ops
->write_config(edev
, edev
->pcie_cap
+ PCI_EXP_DEVCTL
,
952 msleep(EEH_PE_RST_HOLD_TIME
);
954 case EEH_RESET_DEACTIVATE
:
955 eeh_ops
->read_config(edev
, edev
->pcie_cap
+ PCI_EXP_DEVCTL
,
957 reg
&= ~PCI_EXP_DEVCTL_BCR_FLR
;
958 eeh_ops
->write_config(edev
, edev
->pcie_cap
+ PCI_EXP_DEVCTL
,
960 msleep(EEH_PE_RST_SETTLE_TIME
);
967 static int pnv_eeh_do_af_flr(struct pci_dn
*pdn
, int option
)
969 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
972 if (WARN_ON(!edev
->af_cap
))
975 eeh_ops
->read_config(edev
, edev
->af_cap
+ PCI_AF_CAP
, 1, &cap
);
976 if (!(cap
& PCI_AF_CAP_TP
) || !(cap
& PCI_AF_CAP_FLR
))
981 case EEH_RESET_FUNDAMENTAL
:
983 * Wait for Transaction Pending bit to clear. A word-aligned
984 * test is used, so we use the conrol offset rather than status
985 * and shift the test bit to match.
987 pnv_eeh_wait_for_pending(pdn
, "AF",
988 edev
->af_cap
+ PCI_AF_CTRL
,
989 PCI_AF_STATUS_TP
<< 8);
990 eeh_ops
->write_config(edev
, edev
->af_cap
+ PCI_AF_CTRL
,
992 msleep(EEH_PE_RST_HOLD_TIME
);
994 case EEH_RESET_DEACTIVATE
:
995 eeh_ops
->write_config(edev
, edev
->af_cap
+ PCI_AF_CTRL
, 1, 0);
996 msleep(EEH_PE_RST_SETTLE_TIME
);
1003 static int pnv_eeh_reset_vf_pe(struct eeh_pe
*pe
, int option
)
1005 struct eeh_dev
*edev
;
1009 /* The VF PE should have only one child device */
1010 edev
= list_first_entry_or_null(&pe
->edevs
, struct eeh_dev
, entry
);
1011 pdn
= eeh_dev_to_pdn(edev
);
1015 ret
= pnv_eeh_do_flr(pdn
, option
);
1019 return pnv_eeh_do_af_flr(pdn
, option
);
1023 * pnv_eeh_reset - Reset the specified PE
1025 * @option: reset option
1027 * Do reset on the indicated PE. For PCI bus sensitive PE,
1028 * we need to reset the parent p2p bridge. The PHB has to
1029 * be reinitialized if the p2p bridge is root bridge. For
1030 * PCI device sensitive PE, we will try to reset the device
1031 * through FLR. For now, we don't have OPAL APIs to do HARD
1032 * reset yet, so all reset would be SOFT (HOT) reset.
1034 static int pnv_eeh_reset(struct eeh_pe
*pe
, int option
)
1036 struct pci_controller
*hose
= pe
->phb
;
1037 struct pnv_phb
*phb
;
1038 struct pci_bus
*bus
;
1042 * For PHB reset, we always have complete reset. For those PEs whose
1043 * primary bus derived from root complex (root bus) or root port
1044 * (usually bus#1), we apply hot or fundamental reset on the root port.
1045 * For other PEs, we always have hot reset on the PE primary bus.
1047 * Here, we have different design to pHyp, which always clear the
1048 * frozen state during PE reset. However, the good idea here from
1049 * benh is to keep frozen state before we get PE reset done completely
1050 * (until BAR restore). With the frozen state, HW drops illegal IO
1051 * or MMIO access, which can incur recrusive frozen PE during PE
1052 * reset. The side effect is that EEH core has to clear the frozen
1053 * state explicitly after BAR restore.
1055 if (pe
->type
& EEH_PE_PHB
)
1056 return pnv_eeh_phb_reset(hose
, option
);
1059 * The frozen PE might be caused by PAPR error injection
1060 * registers, which are expected to be cleared after hitting
1061 * frozen PE as stated in the hardware spec. Unfortunately,
1062 * that's not true on P7IOC. So we have to clear it manually
1063 * to avoid recursive EEH errors during recovery.
1065 phb
= hose
->private_data
;
1066 if (phb
->model
== PNV_PHB_MODEL_P7IOC
&&
1067 (option
== EEH_RESET_HOT
||
1068 option
== EEH_RESET_FUNDAMENTAL
)) {
1069 rc
= opal_pci_reset(phb
->opal_id
,
1070 OPAL_RESET_PHB_ERROR
,
1072 if (rc
!= OPAL_SUCCESS
) {
1073 pr_warn("%s: Failure %lld clearing error injection registers\n",
1079 if (pe
->type
& EEH_PE_VF
)
1080 return pnv_eeh_reset_vf_pe(pe
, option
);
1082 bus
= eeh_pe_bus_get(pe
);
1084 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
1085 __func__
, pe
->phb
->global_number
, pe
->addr
);
1089 if (pci_is_root_bus(bus
))
1090 return pnv_eeh_root_reset(hose
, option
);
1093 * For hot resets try use the generic PCI error recovery reset
1094 * functions. These correctly handles the case where the secondary
1095 * bus is behind a hotplug slot and it will use the slot provided
1096 * reset methods to prevent spurious hotplug events during the reset.
1098 * Fundemental resets need to be handled internally to EEH since the
1099 * PCI core doesn't really have a concept of a fundemental reset,
1100 * mainly because there's no standard way to generate one. Only a
1101 * few devices require an FRESET so it should be fine.
1103 if (option
!= EEH_RESET_FUNDAMENTAL
) {
1105 * NB: Skiboot and pnv_eeh_bridge_reset() also no-op the
1106 * de-assert step. It's like the OPAL reset API was
1107 * poorly designed or something...
1109 if (option
== EEH_RESET_DEACTIVATE
)
1112 rc
= pci_bus_error_reset(bus
->self
);
1117 /* otherwise, use the generic bridge reset. this might call into FW */
1118 if (pci_is_root_bus(bus
->parent
))
1119 return pnv_eeh_root_reset(hose
, option
);
1120 return pnv_eeh_bridge_reset(bus
->self
, option
);
1124 * pnv_eeh_get_log - Retrieve error log
1126 * @severity: temporary or permanent error log
1127 * @drv_log: driver log to be combined with retrieved error log
1128 * @len: length of driver log
1130 * Retrieve the temporary or permanent error from the PE.
1132 static int pnv_eeh_get_log(struct eeh_pe
*pe
, int severity
,
1133 char *drv_log
, unsigned long len
)
1135 if (!eeh_has_flag(EEH_EARLY_DUMP_LOG
))
1136 pnv_pci_dump_phb_diag_data(pe
->phb
, pe
->data
);
1142 * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
1145 * The function will be called to reconfigure the bridges included
1146 * in the specified PE so that the mulfunctional PE would be recovered
1149 static int pnv_eeh_configure_bridge(struct eeh_pe
*pe
)
1155 * pnv_pe_err_inject - Inject specified error to the indicated PE
1156 * @pe: the indicated PE
1158 * @func: specific error type
1160 * @mask: address mask
1162 * The routine is called to inject specified error, which is
1163 * determined by @type and @func, to the indicated PE for
1166 static int pnv_eeh_err_inject(struct eeh_pe
*pe
, int type
, int func
,
1167 unsigned long addr
, unsigned long mask
)
1169 struct pci_controller
*hose
= pe
->phb
;
1170 struct pnv_phb
*phb
= hose
->private_data
;
1173 if (type
!= OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR
&&
1174 type
!= OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64
) {
1175 pr_warn("%s: Invalid error type %d\n",
1180 if (func
< OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR
||
1181 func
> OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET
) {
1182 pr_warn("%s: Invalid error function %d\n",
1187 /* Firmware supports error injection ? */
1188 if (!opal_check_token(OPAL_PCI_ERR_INJECT
)) {
1189 pr_warn("%s: Firmware doesn't support error injection\n",
1194 /* Do error injection */
1195 rc
= opal_pci_err_inject(phb
->opal_id
, pe
->addr
,
1196 type
, func
, addr
, mask
);
1197 if (rc
!= OPAL_SUCCESS
) {
1198 pr_warn("%s: Failure %lld injecting error "
1199 "%d-%d to PHB#%x-PE#%x\n",
1200 __func__
, rc
, type
, func
,
1201 hose
->global_number
, pe
->addr
);
1208 static inline bool pnv_eeh_cfg_blocked(struct pci_dn
*pdn
)
1210 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
1212 if (!edev
|| !edev
->pe
)
1216 * We will issue FLR or AF FLR to all VFs, which are contained
1217 * in VF PE. It relies on the EEH PCI config accessors. So we
1218 * can't block them during the window.
1220 if (edev
->physfn
&& (edev
->pe
->state
& EEH_PE_RESET
))
1223 if (edev
->pe
->state
& EEH_PE_CFG_BLOCKED
)
1229 static int pnv_eeh_read_config(struct eeh_dev
*edev
,
1230 int where
, int size
, u32
*val
)
1232 struct pci_dn
*pdn
= eeh_dev_to_pdn(edev
);
1235 return PCIBIOS_DEVICE_NOT_FOUND
;
1237 if (pnv_eeh_cfg_blocked(pdn
)) {
1239 return PCIBIOS_SET_FAILED
;
1242 return pnv_pci_cfg_read(pdn
, where
, size
, val
);
1245 static int pnv_eeh_write_config(struct eeh_dev
*edev
,
1246 int where
, int size
, u32 val
)
1248 struct pci_dn
*pdn
= eeh_dev_to_pdn(edev
);
1251 return PCIBIOS_DEVICE_NOT_FOUND
;
1253 if (pnv_eeh_cfg_blocked(pdn
))
1254 return PCIBIOS_SET_FAILED
;
1256 return pnv_pci_cfg_write(pdn
, where
, size
, val
);
1259 static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData
*data
)
1262 if (data
->gemXfir
|| data
->gemRfir
||
1263 data
->gemRirqfir
|| data
->gemMask
|| data
->gemRwof
)
1264 pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n",
1265 be64_to_cpu(data
->gemXfir
),
1266 be64_to_cpu(data
->gemRfir
),
1267 be64_to_cpu(data
->gemRirqfir
),
1268 be64_to_cpu(data
->gemMask
),
1269 be64_to_cpu(data
->gemRwof
));
1272 if (data
->lemFir
|| data
->lemErrMask
||
1273 data
->lemAction0
|| data
->lemAction1
|| data
->lemWof
)
1274 pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n",
1275 be64_to_cpu(data
->lemFir
),
1276 be64_to_cpu(data
->lemErrMask
),
1277 be64_to_cpu(data
->lemAction0
),
1278 be64_to_cpu(data
->lemAction1
),
1279 be64_to_cpu(data
->lemWof
));
1282 static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller
*hose
)
1284 struct pnv_phb
*phb
= hose
->private_data
;
1285 struct OpalIoP7IOCErrorData
*data
=
1286 (struct OpalIoP7IOCErrorData
*)phb
->diag_data
;
1289 rc
= opal_pci_get_hub_diag_data(phb
->hub_id
, data
, sizeof(*data
));
1290 if (rc
!= OPAL_SUCCESS
) {
1291 pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
1292 __func__
, phb
->hub_id
, rc
);
1296 switch (be16_to_cpu(data
->type
)) {
1297 case OPAL_P7IOC_DIAG_TYPE_RGC
:
1298 pr_info("P7IOC diag-data for RGC\n\n");
1299 pnv_eeh_dump_hub_diag_common(data
);
1300 if (data
->rgc
.rgcStatus
|| data
->rgc
.rgcLdcp
)
1301 pr_info(" RGC: %016llx %016llx\n",
1302 be64_to_cpu(data
->rgc
.rgcStatus
),
1303 be64_to_cpu(data
->rgc
.rgcLdcp
));
1305 case OPAL_P7IOC_DIAG_TYPE_BI
:
1306 pr_info("P7IOC diag-data for BI %s\n\n",
1307 data
->bi
.biDownbound
? "Downbound" : "Upbound");
1308 pnv_eeh_dump_hub_diag_common(data
);
1309 if (data
->bi
.biLdcp0
|| data
->bi
.biLdcp1
||
1310 data
->bi
.biLdcp2
|| data
->bi
.biFenceStatus
)
1311 pr_info(" BI: %016llx %016llx %016llx %016llx\n",
1312 be64_to_cpu(data
->bi
.biLdcp0
),
1313 be64_to_cpu(data
->bi
.biLdcp1
),
1314 be64_to_cpu(data
->bi
.biLdcp2
),
1315 be64_to_cpu(data
->bi
.biFenceStatus
));
1317 case OPAL_P7IOC_DIAG_TYPE_CI
:
1318 pr_info("P7IOC diag-data for CI Port %d\n\n",
1320 pnv_eeh_dump_hub_diag_common(data
);
1321 if (data
->ci
.ciPortStatus
|| data
->ci
.ciPortLdcp
)
1322 pr_info(" CI: %016llx %016llx\n",
1323 be64_to_cpu(data
->ci
.ciPortStatus
),
1324 be64_to_cpu(data
->ci
.ciPortLdcp
));
1326 case OPAL_P7IOC_DIAG_TYPE_MISC
:
1327 pr_info("P7IOC diag-data for MISC\n\n");
1328 pnv_eeh_dump_hub_diag_common(data
);
1330 case OPAL_P7IOC_DIAG_TYPE_I2C
:
1331 pr_info("P7IOC diag-data for I2C\n\n");
1332 pnv_eeh_dump_hub_diag_common(data
);
1335 pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
1336 __func__
, phb
->hub_id
, data
->type
);
1340 static int pnv_eeh_get_pe(struct pci_controller
*hose
,
1341 u16 pe_no
, struct eeh_pe
**pe
)
1343 struct pnv_phb
*phb
= hose
->private_data
;
1344 struct pnv_ioda_pe
*pnv_pe
;
1345 struct eeh_pe
*dev_pe
;
1348 * If PHB supports compound PE, to fetch
1349 * the master PE because slave PE is invisible
1352 pnv_pe
= &phb
->ioda
.pe_array
[pe_no
];
1353 if (pnv_pe
->flags
& PNV_IODA_PE_SLAVE
) {
1354 pnv_pe
= pnv_pe
->master
;
1356 !(pnv_pe
->flags
& PNV_IODA_PE_MASTER
));
1357 pe_no
= pnv_pe
->pe_number
;
1360 /* Find the PE according to PE# */
1361 dev_pe
= eeh_pe_get(hose
, pe_no
);
1365 /* Freeze the (compound) PE */
1367 if (!(dev_pe
->state
& EEH_PE_ISOLATED
))
1368 phb
->freeze_pe(phb
, pe_no
);
1371 * At this point, we're sure the (compound) PE should
1372 * have been frozen. However, we still need poke until
1373 * hitting the frozen PE on top level.
1375 dev_pe
= dev_pe
->parent
;
1376 while (dev_pe
&& !(dev_pe
->type
& EEH_PE_PHB
)) {
1378 ret
= eeh_ops
->get_state(dev_pe
, NULL
);
1379 if (ret
<= 0 || eeh_state_active(ret
)) {
1380 dev_pe
= dev_pe
->parent
;
1384 /* Frozen parent PE */
1386 if (!(dev_pe
->state
& EEH_PE_ISOLATED
))
1387 phb
->freeze_pe(phb
, dev_pe
->addr
);
1390 dev_pe
= dev_pe
->parent
;
1397 * pnv_eeh_next_error - Retrieve next EEH error to handle
1400 * The function is expected to be called by EEH core while it gets
1401 * special EEH event (without binding PE). The function calls to
1402 * OPAL APIs for next error to handle. The informational error is
1403 * handled internally by platform. However, the dead IOC, dead PHB,
1404 * fenced PHB and frozen PE should be handled by EEH core eventually.
1406 static int pnv_eeh_next_error(struct eeh_pe
**pe
)
1408 struct pci_controller
*hose
;
1409 struct pnv_phb
*phb
;
1410 struct eeh_pe
*phb_pe
, *parent_pe
;
1411 __be64 frozen_pe_no
;
1412 __be16 err_type
, severity
;
1414 int state
, ret
= EEH_NEXT_ERR_NONE
;
1417 * While running here, it's safe to purge the event queue. The
1418 * event should still be masked.
1420 eeh_remove_event(NULL
, false);
1422 list_for_each_entry(hose
, &hose_list
, list_node
) {
1424 * If the subordinate PCI buses of the PHB has been
1425 * removed or is exactly under error recovery, we
1426 * needn't take care of it any more.
1428 phb
= hose
->private_data
;
1429 phb_pe
= eeh_phb_pe_get(hose
);
1430 if (!phb_pe
|| (phb_pe
->state
& EEH_PE_ISOLATED
))
1433 rc
= opal_pci_next_error(phb
->opal_id
,
1434 &frozen_pe_no
, &err_type
, &severity
);
1435 if (rc
!= OPAL_SUCCESS
) {
1436 pr_devel("%s: Invalid return value on "
1437 "PHB#%x (0x%lx) from opal_pci_next_error",
1438 __func__
, hose
->global_number
, rc
);
1442 /* If the PHB doesn't have error, stop processing */
1443 if (be16_to_cpu(err_type
) == OPAL_EEH_NO_ERROR
||
1444 be16_to_cpu(severity
) == OPAL_EEH_SEV_NO_ERROR
) {
1445 pr_devel("%s: No error found on PHB#%x\n",
1446 __func__
, hose
->global_number
);
1451 * Processing the error. We're expecting the error with
1452 * highest priority reported upon multiple errors on the
1455 pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
1456 __func__
, be16_to_cpu(err_type
),
1457 be16_to_cpu(severity
), be64_to_cpu(frozen_pe_no
),
1458 hose
->global_number
);
1459 switch (be16_to_cpu(err_type
)) {
1460 case OPAL_EEH_IOC_ERROR
:
1461 if (be16_to_cpu(severity
) == OPAL_EEH_SEV_IOC_DEAD
) {
1462 pr_err("EEH: dead IOC detected\n");
1463 ret
= EEH_NEXT_ERR_DEAD_IOC
;
1464 } else if (be16_to_cpu(severity
) == OPAL_EEH_SEV_INF
) {
1465 pr_info("EEH: IOC informative error "
1467 pnv_eeh_get_and_dump_hub_diag(hose
);
1468 ret
= EEH_NEXT_ERR_NONE
;
1472 case OPAL_EEH_PHB_ERROR
:
1473 if (be16_to_cpu(severity
) == OPAL_EEH_SEV_PHB_DEAD
) {
1475 pr_err("EEH: dead PHB#%x detected, "
1477 hose
->global_number
,
1478 eeh_pe_loc_get(phb_pe
));
1479 ret
= EEH_NEXT_ERR_DEAD_PHB
;
1480 } else if (be16_to_cpu(severity
) ==
1481 OPAL_EEH_SEV_PHB_FENCED
) {
1483 pr_err("EEH: Fenced PHB#%x detected, "
1485 hose
->global_number
,
1486 eeh_pe_loc_get(phb_pe
));
1487 ret
= EEH_NEXT_ERR_FENCED_PHB
;
1488 } else if (be16_to_cpu(severity
) == OPAL_EEH_SEV_INF
) {
1489 pr_info("EEH: PHB#%x informative error "
1490 "detected, location: %s\n",
1491 hose
->global_number
,
1492 eeh_pe_loc_get(phb_pe
));
1493 pnv_eeh_get_phb_diag(phb_pe
);
1494 pnv_pci_dump_phb_diag_data(hose
, phb_pe
->data
);
1495 ret
= EEH_NEXT_ERR_NONE
;
1499 case OPAL_EEH_PE_ERROR
:
1501 * If we can't find the corresponding PE, we
1502 * just try to unfreeze.
1504 if (pnv_eeh_get_pe(hose
,
1505 be64_to_cpu(frozen_pe_no
), pe
)) {
1506 pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
1507 hose
->global_number
, be64_to_cpu(frozen_pe_no
));
1508 pr_info("EEH: PHB location: %s\n",
1509 eeh_pe_loc_get(phb_pe
));
1511 /* Dump PHB diag-data */
1512 rc
= opal_pci_get_phb_diag_data2(phb
->opal_id
,
1513 phb
->diag_data
, phb
->diag_data_size
);
1514 if (rc
== OPAL_SUCCESS
)
1515 pnv_pci_dump_phb_diag_data(hose
,
1518 /* Try best to clear it */
1519 opal_pci_eeh_freeze_clear(phb
->opal_id
,
1520 be64_to_cpu(frozen_pe_no
),
1521 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL
);
1522 ret
= EEH_NEXT_ERR_NONE
;
1523 } else if ((*pe
)->state
& EEH_PE_ISOLATED
||
1524 eeh_pe_passed(*pe
)) {
1525 ret
= EEH_NEXT_ERR_NONE
;
1527 pr_err("EEH: Frozen PE#%x "
1528 "on PHB#%x detected\n",
1530 (*pe
)->phb
->global_number
);
1531 pr_err("EEH: PE location: %s, "
1532 "PHB location: %s\n",
1533 eeh_pe_loc_get(*pe
),
1534 eeh_pe_loc_get(phb_pe
));
1535 ret
= EEH_NEXT_ERR_FROZEN_PE
;
1540 pr_warn("%s: Unexpected error type %d\n",
1541 __func__
, be16_to_cpu(err_type
));
1545 * EEH core will try recover from fenced PHB or
1546 * frozen PE. In the time for frozen PE, EEH core
1547 * enable IO path for that before collecting logs,
1548 * but it ruins the site. So we have to dump the
1549 * log in advance here.
1551 if ((ret
== EEH_NEXT_ERR_FROZEN_PE
||
1552 ret
== EEH_NEXT_ERR_FENCED_PHB
) &&
1553 !((*pe
)->state
& EEH_PE_ISOLATED
)) {
1554 eeh_pe_mark_isolated(*pe
);
1555 pnv_eeh_get_phb_diag(*pe
);
1557 if (eeh_has_flag(EEH_EARLY_DUMP_LOG
))
1558 pnv_pci_dump_phb_diag_data((*pe
)->phb
,
1563 * We probably have the frozen parent PE out there and
1564 * we need have to handle frozen parent PE firstly.
1566 if (ret
== EEH_NEXT_ERR_FROZEN_PE
) {
1567 parent_pe
= (*pe
)->parent
;
1569 /* Hit the ceiling ? */
1570 if (parent_pe
->type
& EEH_PE_PHB
)
1573 /* Frozen parent PE ? */
1574 state
= eeh_ops
->get_state(parent_pe
, NULL
);
1575 if (state
> 0 && !eeh_state_active(state
))
1578 /* Next parent level */
1579 parent_pe
= parent_pe
->parent
;
1582 /* We possibly migrate to another PE */
1583 eeh_pe_mark_isolated(*pe
);
1587 * If we have no errors on the specific PHB or only
1588 * informative error there, we continue poking it.
1589 * Otherwise, we need actions to be taken by upper
1592 if (ret
> EEH_NEXT_ERR_INF
)
1596 /* Unmask the event */
1597 if (ret
== EEH_NEXT_ERR_NONE
&& eeh_enabled())
1598 enable_irq(eeh_event_irq
);
1603 static int pnv_eeh_restore_config(struct eeh_dev
*edev
)
1605 struct pnv_phb
*phb
;
1614 phb
= edev
->controller
->private_data
;
1615 ret
= opal_pci_reinit(phb
->opal_id
,
1616 OPAL_REINIT_PCI_DEV
, edev
->bdfn
);
1619 pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
1620 __func__
, edev
->bdfn
, ret
);
1627 static struct eeh_ops pnv_eeh_ops
= {
1629 .probe
= pnv_eeh_probe
,
1630 .set_option
= pnv_eeh_set_option
,
1631 .get_state
= pnv_eeh_get_state
,
1632 .reset
= pnv_eeh_reset
,
1633 .get_log
= pnv_eeh_get_log
,
1634 .configure_bridge
= pnv_eeh_configure_bridge
,
1635 .err_inject
= pnv_eeh_err_inject
,
1636 .read_config
= pnv_eeh_read_config
,
1637 .write_config
= pnv_eeh_write_config
,
1638 .next_error
= pnv_eeh_next_error
,
1639 .restore_config
= pnv_eeh_restore_config
,
1640 .notify_resume
= NULL
1643 #ifdef CONFIG_PCI_IOV
1644 static void pnv_pci_fixup_vf_mps(struct pci_dev
*pdev
)
1646 struct pci_dn
*pdn
= pci_get_pdn(pdev
);
1649 if (!pdev
->is_virtfn
)
1652 /* Synchronize MPS for VF and PF */
1653 parent_mps
= pcie_get_mps(pdev
->physfn
);
1654 if ((128 << pdev
->pcie_mpss
) >= parent_mps
)
1655 pcie_set_mps(pdev
, parent_mps
);
1656 pdn
->mps
= pcie_get_mps(pdev
);
1658 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID
, PCI_ANY_ID
, pnv_pci_fixup_vf_mps
);
1659 #endif /* CONFIG_PCI_IOV */
1662 * eeh_powernv_init - Register platform dependent EEH operations
1664 * EEH initialization on powernv platform. This function should be
1665 * called before any EEH related functions.
1667 static int __init
eeh_powernv_init(void)
1669 int max_diag_size
= PNV_PCI_DIAG_BUF_SIZE
;
1670 struct pci_controller
*hose
;
1671 struct pnv_phb
*phb
;
1674 if (!firmware_has_feature(FW_FEATURE_OPAL
)) {
1675 pr_warn("%s: OPAL is required !\n", __func__
);
1679 /* Set probe mode */
1680 eeh_add_flag(EEH_PROBE_MODE_DEV
);
1683 * P7IOC blocks PCI config access to frozen PE, but PHB3
1684 * doesn't do that. So we have to selectively enable I/O
1685 * prior to collecting error log.
1687 list_for_each_entry(hose
, &hose_list
, list_node
) {
1688 phb
= hose
->private_data
;
1690 if (phb
->model
== PNV_PHB_MODEL_P7IOC
)
1691 eeh_add_flag(EEH_ENABLE_IO_FOR_LOG
);
1693 if (phb
->diag_data_size
> max_diag_size
)
1694 max_diag_size
= phb
->diag_data_size
;
1700 * eeh_init() allocates the eeh_pe and its aux data buf so the
1701 * size needs to be set before calling eeh_init().
1703 eeh_set_pe_aux_size(max_diag_size
);
1704 ppc_md
.pcibios_bus_add_device
= pnv_pcibios_bus_add_device
;
1706 ret
= eeh_init(&pnv_eeh_ops
);
1708 pr_info("EEH: PowerNV platform initialized\n");
1710 pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret
);
1714 machine_arch_initcall(powernv
, eeh_powernv_init
);