2 * The file intends to implement the platform dependent EEH operations on
3 * powernv platform. Actually, the powernv was created in order to fully
6 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
14 #include <linux/atomic.h>
15 #include <linux/debugfs.h>
16 #include <linux/delay.h>
17 #include <linux/export.h>
18 #include <linux/init.h>
19 #include <linux/interrupt.h>
20 #include <linux/list.h>
21 #include <linux/msi.h>
23 #include <linux/pci.h>
24 #include <linux/proc_fs.h>
25 #include <linux/rbtree.h>
26 #include <linux/sched.h>
27 #include <linux/seq_file.h>
28 #include <linux/spinlock.h>
31 #include <asm/eeh_event.h>
32 #include <asm/firmware.h>
34 #include <asm/iommu.h>
35 #include <asm/machdep.h>
36 #include <asm/msi_bitmap.h>
38 #include <asm/ppc-pci.h>
39 #include <asm/pnv-pci.h>
44 static int eeh_event_irq
= -EINVAL
;
46 void pnv_pcibios_bus_add_device(struct pci_dev
*pdev
)
48 struct pci_dn
*pdn
= pci_get_pdn(pdev
);
54 * The following operations will fail if VF's sysfs files
55 * aren't created or its resources aren't finalized.
57 eeh_add_device_early(pdn
);
58 eeh_add_device_late(pdev
);
59 eeh_sysfs_add_device(pdev
);
62 static int pnv_eeh_init(void)
64 struct pci_controller
*hose
;
66 int max_diag_size
= PNV_PCI_DIAG_BUF_SIZE
;
68 if (!firmware_has_feature(FW_FEATURE_OPAL
)) {
69 pr_warn("%s: OPAL is required !\n",
75 eeh_add_flag(EEH_PROBE_MODE_DEV
);
78 * P7IOC blocks PCI config access to frozen PE, but PHB3
79 * doesn't do that. So we have to selectively enable I/O
80 * prior to collecting error log.
82 list_for_each_entry(hose
, &hose_list
, list_node
) {
83 phb
= hose
->private_data
;
85 if (phb
->model
== PNV_PHB_MODEL_P7IOC
)
86 eeh_add_flag(EEH_ENABLE_IO_FOR_LOG
);
88 if (phb
->diag_data_size
> max_diag_size
)
89 max_diag_size
= phb
->diag_data_size
;
92 * PE#0 should be regarded as valid by EEH core
93 * if it's not the reserved one. Currently, we
94 * have the reserved PE#255 and PE#127 for PHB3
95 * and P7IOC separately. So we should regard
96 * PE#0 as valid for PHB3 and P7IOC.
98 if (phb
->ioda
.reserved_pe_idx
!= 0)
99 eeh_add_flag(EEH_VALID_PE_ZERO
);
104 eeh_set_pe_aux_size(max_diag_size
);
105 ppc_md
.pcibios_bus_add_device
= pnv_pcibios_bus_add_device
;
110 static irqreturn_t
pnv_eeh_event(int irq
, void *data
)
113 * We simply send a special EEH event if EEH has been
114 * enabled. We don't care about EEH events until we've
115 * finished processing the outstanding ones. Event processing
116 * gets unmasked in next_error() if EEH is enabled.
118 disable_irq_nosync(irq
);
121 eeh_send_failure_event(NULL
);
126 #ifdef CONFIG_DEBUG_FS
127 static ssize_t
pnv_eeh_ei_write(struct file
*filp
,
128 const char __user
*user_buf
,
129 size_t count
, loff_t
*ppos
)
131 struct pci_controller
*hose
= filp
->private_data
;
133 int pe_no
, type
, func
;
134 unsigned long addr
, mask
;
138 if (!eeh_ops
|| !eeh_ops
->err_inject
)
141 /* Copy over argument buffer */
142 ret
= simple_write_to_buffer(buf
, sizeof(buf
), ppos
, user_buf
, count
);
146 /* Retrieve parameters */
147 ret
= sscanf(buf
, "%x:%x:%x:%lx:%lx",
148 &pe_no
, &type
, &func
, &addr
, &mask
);
153 pe
= eeh_pe_get(hose
, pe_no
, 0);
157 /* Do error injection */
158 ret
= eeh_ops
->err_inject(pe
, type
, func
, addr
, mask
);
159 return ret
< 0 ? ret
: count
;
162 static const struct file_operations pnv_eeh_ei_fops
= {
165 .write
= pnv_eeh_ei_write
,
168 static int pnv_eeh_dbgfs_set(void *data
, int offset
, u64 val
)
170 struct pci_controller
*hose
= data
;
171 struct pnv_phb
*phb
= hose
->private_data
;
173 out_be64(phb
->regs
+ offset
, val
);
177 static int pnv_eeh_dbgfs_get(void *data
, int offset
, u64
*val
)
179 struct pci_controller
*hose
= data
;
180 struct pnv_phb
*phb
= hose
->private_data
;
182 *val
= in_be64(phb
->regs
+ offset
);
186 #define PNV_EEH_DBGFS_ENTRY(name, reg) \
187 static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \
189 return pnv_eeh_dbgfs_set(data, reg, val); \
192 static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \
194 return pnv_eeh_dbgfs_get(data, reg, val); \
197 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \
198 pnv_eeh_dbgfs_get_##name, \
199 pnv_eeh_dbgfs_set_##name, \
202 PNV_EEH_DBGFS_ENTRY(outb
, 0xD10);
203 PNV_EEH_DBGFS_ENTRY(inbA
, 0xD90);
204 PNV_EEH_DBGFS_ENTRY(inbB
, 0xE10);
206 #endif /* CONFIG_DEBUG_FS */
209 * pnv_eeh_post_init - EEH platform dependent post initialization
211 * EEH platform dependent post initialization on powernv. When
212 * the function is called, the EEH PEs and devices should have
213 * been built. If the I/O cache staff has been built, EEH is
214 * ready to supply service.
216 int pnv_eeh_post_init(void)
218 struct pci_controller
*hose
;
222 /* Probe devices & build address cache */
224 eeh_addr_cache_build();
226 /* Register OPAL event notifier */
227 eeh_event_irq
= opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR
));
228 if (eeh_event_irq
< 0) {
229 pr_err("%s: Can't register OPAL event interrupt (%d)\n",
230 __func__
, eeh_event_irq
);
231 return eeh_event_irq
;
234 ret
= request_irq(eeh_event_irq
, pnv_eeh_event
,
235 IRQ_TYPE_LEVEL_HIGH
, "opal-eeh", NULL
);
237 irq_dispose_mapping(eeh_event_irq
);
238 pr_err("%s: Can't request OPAL event interrupt (%d)\n",
239 __func__
, eeh_event_irq
);
244 disable_irq(eeh_event_irq
);
246 list_for_each_entry(hose
, &hose_list
, list_node
) {
247 phb
= hose
->private_data
;
250 * If EEH is enabled, we're going to rely on that.
251 * Otherwise, we restore to conventional mechanism
252 * to clear frozen PE during PCI config access.
255 phb
->flags
|= PNV_PHB_FLAG_EEH
;
257 phb
->flags
&= ~PNV_PHB_FLAG_EEH
;
259 /* Create debugfs entries */
260 #ifdef CONFIG_DEBUG_FS
261 if (phb
->has_dbgfs
|| !phb
->dbgfs
)
265 debugfs_create_file("err_injct", 0200,
269 debugfs_create_file("err_injct_outbound", 0600,
271 &pnv_eeh_dbgfs_ops_outb
);
272 debugfs_create_file("err_injct_inboundA", 0600,
274 &pnv_eeh_dbgfs_ops_inbA
);
275 debugfs_create_file("err_injct_inboundB", 0600,
277 &pnv_eeh_dbgfs_ops_inbB
);
278 #endif /* CONFIG_DEBUG_FS */
284 static int pnv_eeh_find_cap(struct pci_dn
*pdn
, int cap
)
286 int pos
= PCI_CAPABILITY_LIST
;
287 int cnt
= 48; /* Maximal number of capabilities */
293 /* Check if the device supports capabilities */
294 pnv_pci_cfg_read(pdn
, PCI_STATUS
, 2, &status
);
295 if (!(status
& PCI_STATUS_CAP_LIST
))
299 pnv_pci_cfg_read(pdn
, pos
, 1, &pos
);
304 pnv_pci_cfg_read(pdn
, pos
+ PCI_CAP_LIST_ID
, 1, &id
);
313 pos
+= PCI_CAP_LIST_NEXT
;
319 static int pnv_eeh_find_ecap(struct pci_dn
*pdn
, int cap
)
321 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
323 int pos
= 256, ttl
= (4096 - 256) / 8;
325 if (!edev
|| !edev
->pcie_cap
)
327 if (pnv_pci_cfg_read(pdn
, pos
, 4, &header
) != PCIBIOS_SUCCESSFUL
)
333 if (PCI_EXT_CAP_ID(header
) == cap
&& pos
)
336 pos
= PCI_EXT_CAP_NEXT(header
);
340 if (pnv_pci_cfg_read(pdn
, pos
, 4, &header
) != PCIBIOS_SUCCESSFUL
)
348 * pnv_eeh_probe - Do probe on PCI device
349 * @pdn: PCI device node
352 * When EEH module is installed during system boot, all PCI devices
353 * are checked one by one to see if it supports EEH. The function
354 * is introduced for the purpose. By default, EEH has been enabled
355 * on all PCI devices. That's to say, we only need do necessary
356 * initialization on the corresponding eeh device and create PE
359 * It's notable that's unsafe to retrieve the EEH device through
360 * the corresponding PCI device. During the PCI device hotplug, which
361 * was possiblly triggered by EEH core, the binding between EEH device
362 * and the PCI device isn't built yet.
364 static void *pnv_eeh_probe(struct pci_dn
*pdn
, void *data
)
366 struct pci_controller
*hose
= pdn
->phb
;
367 struct pnv_phb
*phb
= hose
->private_data
;
368 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
371 int config_addr
= (pdn
->busno
<< 8) | (pdn
->devfn
);
374 * When probing the root bridge, which doesn't have any
375 * subordinate PCI devices. We don't have OF node for
376 * the root bridge. So it's not reasonable to continue
379 if (!edev
|| edev
->pe
)
382 /* Skip for PCI-ISA bridge */
383 if ((pdn
->class_code
>> 8) == PCI_CLASS_BRIDGE_ISA
)
386 /* Skip if we haven't probed yet */
387 if (phb
->ioda
.pe_rmap
[config_addr
] == IODA_INVALID_PE
)
390 /* Initialize eeh device */
391 edev
->class_code
= pdn
->class_code
;
392 edev
->mode
&= 0xFFFFFF00;
393 edev
->pcix_cap
= pnv_eeh_find_cap(pdn
, PCI_CAP_ID_PCIX
);
394 edev
->pcie_cap
= pnv_eeh_find_cap(pdn
, PCI_CAP_ID_EXP
);
395 edev
->af_cap
= pnv_eeh_find_cap(pdn
, PCI_CAP_ID_AF
);
396 edev
->aer_cap
= pnv_eeh_find_ecap(pdn
, PCI_EXT_CAP_ID_ERR
);
397 if ((edev
->class_code
>> 8) == PCI_CLASS_BRIDGE_PCI
) {
398 edev
->mode
|= EEH_DEV_BRIDGE
;
399 if (edev
->pcie_cap
) {
400 pnv_pci_cfg_read(pdn
, edev
->pcie_cap
+ PCI_EXP_FLAGS
,
402 pcie_flags
= (pcie_flags
& PCI_EXP_FLAGS_TYPE
) >> 4;
403 if (pcie_flags
== PCI_EXP_TYPE_ROOT_PORT
)
404 edev
->mode
|= EEH_DEV_ROOT_PORT
;
405 else if (pcie_flags
== PCI_EXP_TYPE_DOWNSTREAM
)
406 edev
->mode
|= EEH_DEV_DS_PORT
;
410 edev
->pe_config_addr
= phb
->ioda
.pe_rmap
[config_addr
];
413 ret
= eeh_add_to_parent_pe(edev
);
415 pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%x)\n",
416 __func__
, hose
->global_number
, pdn
->busno
,
417 PCI_SLOT(pdn
->devfn
), PCI_FUNC(pdn
->devfn
), ret
);
422 * If the PE contains any one of following adapters, the
423 * PCI config space can't be accessed when dumping EEH log.
424 * Otherwise, we will run into fenced PHB caused by shortage
425 * of outbound credits in the adapter. The PCI config access
426 * should be blocked until PE reset. MMIO access is dropped
427 * by hardware certainly. In order to drop PCI config requests,
428 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
429 * will be checked in the backend for PE state retrival. If
430 * the PE becomes frozen for the first time and the flag has
431 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
432 * that PE to block its config space.
434 * Broadcom BCM5718 2-ports NICs (14e4:1656)
435 * Broadcom Austin 4-ports NICs (14e4:1657)
436 * Broadcom Shiner 4-ports 1G NICs (14e4:168a)
437 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
439 if ((pdn
->vendor_id
== PCI_VENDOR_ID_BROADCOM
&&
440 pdn
->device_id
== 0x1656) ||
441 (pdn
->vendor_id
== PCI_VENDOR_ID_BROADCOM
&&
442 pdn
->device_id
== 0x1657) ||
443 (pdn
->vendor_id
== PCI_VENDOR_ID_BROADCOM
&&
444 pdn
->device_id
== 0x168a) ||
445 (pdn
->vendor_id
== PCI_VENDOR_ID_BROADCOM
&&
446 pdn
->device_id
== 0x168e))
447 edev
->pe
->state
|= EEH_PE_CFG_RESTRICTED
;
450 * Cache the PE primary bus, which can't be fetched when
451 * full hotplug is in progress. In that case, all child
452 * PCI devices of the PE are expected to be removed prior
455 if (!(edev
->pe
->state
& EEH_PE_PRI_BUS
)) {
456 edev
->pe
->bus
= pci_find_bus(hose
->global_number
,
459 edev
->pe
->state
|= EEH_PE_PRI_BUS
;
463 * Enable EEH explicitly so that we will do EEH check
464 * while accessing I/O stuff
466 eeh_add_flag(EEH_ENABLED
);
468 /* Save memory bars */
475 * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
477 * @option: operation to be issued
479 * The function is used to control the EEH functionality globally.
480 * Currently, following options are support according to PAPR:
481 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
483 static int pnv_eeh_set_option(struct eeh_pe
*pe
, int option
)
485 struct pci_controller
*hose
= pe
->phb
;
486 struct pnv_phb
*phb
= hose
->private_data
;
487 bool freeze_pe
= false;
492 case EEH_OPT_DISABLE
:
496 case EEH_OPT_THAW_MMIO
:
497 opt
= OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO
;
499 case EEH_OPT_THAW_DMA
:
500 opt
= OPAL_EEH_ACTION_CLEAR_FREEZE_DMA
;
502 case EEH_OPT_FREEZE_PE
:
504 opt
= OPAL_EEH_ACTION_SET_FREEZE_ALL
;
507 pr_warn("%s: Invalid option %d\n", __func__
, option
);
511 /* Freeze master and slave PEs if PHB supports compound PEs */
513 if (phb
->freeze_pe
) {
514 phb
->freeze_pe(phb
, pe
->addr
);
518 rc
= opal_pci_eeh_freeze_set(phb
->opal_id
, pe
->addr
, opt
);
519 if (rc
!= OPAL_SUCCESS
) {
520 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
521 __func__
, rc
, phb
->hose
->global_number
,
529 /* Unfreeze master and slave PEs if PHB supports */
530 if (phb
->unfreeze_pe
)
531 return phb
->unfreeze_pe(phb
, pe
->addr
, opt
);
533 rc
= opal_pci_eeh_freeze_clear(phb
->opal_id
, pe
->addr
, opt
);
534 if (rc
!= OPAL_SUCCESS
) {
535 pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
536 __func__
, rc
, option
, phb
->hose
->global_number
,
545 * pnv_eeh_get_pe_addr - Retrieve PE address
548 * Retrieve the PE address according to the given tranditional
549 * PCI BDF (Bus/Device/Function) address.
551 static int pnv_eeh_get_pe_addr(struct eeh_pe
*pe
)
556 static void pnv_eeh_get_phb_diag(struct eeh_pe
*pe
)
558 struct pnv_phb
*phb
= pe
->phb
->private_data
;
561 rc
= opal_pci_get_phb_diag_data2(phb
->opal_id
, pe
->data
,
562 phb
->diag_data_size
);
563 if (rc
!= OPAL_SUCCESS
)
564 pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
565 __func__
, rc
, pe
->phb
->global_number
);
568 static int pnv_eeh_get_phb_state(struct eeh_pe
*pe
)
570 struct pnv_phb
*phb
= pe
->phb
->private_data
;
576 rc
= opal_pci_eeh_freeze_status(phb
->opal_id
,
581 if (rc
!= OPAL_SUCCESS
) {
582 pr_warn("%s: Failure %lld getting PHB#%x state\n",
583 __func__
, rc
, phb
->hose
->global_number
);
584 return EEH_STATE_NOT_SUPPORT
;
588 * Check PHB state. If the PHB is frozen for the
589 * first time, to dump the PHB diag-data.
591 if (be16_to_cpu(pcierr
) != OPAL_EEH_PHB_ERROR
) {
592 result
= (EEH_STATE_MMIO_ACTIVE
|
593 EEH_STATE_DMA_ACTIVE
|
594 EEH_STATE_MMIO_ENABLED
|
595 EEH_STATE_DMA_ENABLED
);
596 } else if (!(pe
->state
& EEH_PE_ISOLATED
)) {
597 eeh_pe_state_mark(pe
, EEH_PE_ISOLATED
);
598 pnv_eeh_get_phb_diag(pe
);
600 if (eeh_has_flag(EEH_EARLY_DUMP_LOG
))
601 pnv_pci_dump_phb_diag_data(pe
->phb
, pe
->data
);
607 static int pnv_eeh_get_pe_state(struct eeh_pe
*pe
)
609 struct pnv_phb
*phb
= pe
->phb
->private_data
;
616 * We don't clobber hardware frozen state until PE
617 * reset is completed. In order to keep EEH core
618 * moving forward, we have to return operational
619 * state during PE reset.
621 if (pe
->state
& EEH_PE_RESET
) {
622 result
= (EEH_STATE_MMIO_ACTIVE
|
623 EEH_STATE_DMA_ACTIVE
|
624 EEH_STATE_MMIO_ENABLED
|
625 EEH_STATE_DMA_ENABLED
);
630 * Fetch PE state from hardware. If the PHB
631 * supports compound PE, let it handle that.
633 if (phb
->get_pe_state
) {
634 fstate
= phb
->get_pe_state(phb
, pe
->addr
);
636 rc
= opal_pci_eeh_freeze_status(phb
->opal_id
,
641 if (rc
!= OPAL_SUCCESS
) {
642 pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
643 __func__
, rc
, phb
->hose
->global_number
,
645 return EEH_STATE_NOT_SUPPORT
;
649 /* Figure out state */
651 case OPAL_EEH_STOPPED_NOT_FROZEN
:
652 result
= (EEH_STATE_MMIO_ACTIVE
|
653 EEH_STATE_DMA_ACTIVE
|
654 EEH_STATE_MMIO_ENABLED
|
655 EEH_STATE_DMA_ENABLED
);
657 case OPAL_EEH_STOPPED_MMIO_FREEZE
:
658 result
= (EEH_STATE_DMA_ACTIVE
|
659 EEH_STATE_DMA_ENABLED
);
661 case OPAL_EEH_STOPPED_DMA_FREEZE
:
662 result
= (EEH_STATE_MMIO_ACTIVE
|
663 EEH_STATE_MMIO_ENABLED
);
665 case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE
:
668 case OPAL_EEH_STOPPED_RESET
:
669 result
= EEH_STATE_RESET_ACTIVE
;
671 case OPAL_EEH_STOPPED_TEMP_UNAVAIL
:
672 result
= EEH_STATE_UNAVAILABLE
;
674 case OPAL_EEH_STOPPED_PERM_UNAVAIL
:
675 result
= EEH_STATE_NOT_SUPPORT
;
678 result
= EEH_STATE_NOT_SUPPORT
;
679 pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
680 __func__
, phb
->hose
->global_number
,
685 * If PHB supports compound PE, to freeze all
686 * slave PEs for consistency.
688 * If the PE is switching to frozen state for the
689 * first time, to dump the PHB diag-data.
691 if (!(result
& EEH_STATE_NOT_SUPPORT
) &&
692 !(result
& EEH_STATE_UNAVAILABLE
) &&
693 !(result
& EEH_STATE_MMIO_ACTIVE
) &&
694 !(result
& EEH_STATE_DMA_ACTIVE
) &&
695 !(pe
->state
& EEH_PE_ISOLATED
)) {
697 phb
->freeze_pe(phb
, pe
->addr
);
699 eeh_pe_state_mark(pe
, EEH_PE_ISOLATED
);
700 pnv_eeh_get_phb_diag(pe
);
702 if (eeh_has_flag(EEH_EARLY_DUMP_LOG
))
703 pnv_pci_dump_phb_diag_data(pe
->phb
, pe
->data
);
710 * pnv_eeh_get_state - Retrieve PE state
712 * @delay: delay while PE state is temporarily unavailable
714 * Retrieve the state of the specified PE. For IODA-compitable
715 * platform, it should be retrieved from IODA table. Therefore,
716 * we prefer passing down to hardware implementation to handle
719 static int pnv_eeh_get_state(struct eeh_pe
*pe
, int *delay
)
723 if (pe
->type
& EEH_PE_PHB
)
724 ret
= pnv_eeh_get_phb_state(pe
);
726 ret
= pnv_eeh_get_pe_state(pe
);
732 * If the PE state is temporarily unavailable,
733 * to inform the EEH core delay for default
737 if (ret
& EEH_STATE_UNAVAILABLE
)
743 static s64
pnv_eeh_poll(unsigned long id
)
745 s64 rc
= OPAL_HARDWARE
;
748 rc
= opal_pci_poll(id
);
752 if (system_state
< SYSTEM_RUNNING
)
761 int pnv_eeh_phb_reset(struct pci_controller
*hose
, int option
)
763 struct pnv_phb
*phb
= hose
->private_data
;
764 s64 rc
= OPAL_HARDWARE
;
766 pr_debug("%s: Reset PHB#%x, option=%d\n",
767 __func__
, hose
->global_number
, option
);
769 /* Issue PHB complete reset request */
770 if (option
== EEH_RESET_FUNDAMENTAL
||
771 option
== EEH_RESET_HOT
)
772 rc
= opal_pci_reset(phb
->opal_id
,
773 OPAL_RESET_PHB_COMPLETE
,
775 else if (option
== EEH_RESET_DEACTIVATE
)
776 rc
= opal_pci_reset(phb
->opal_id
,
777 OPAL_RESET_PHB_COMPLETE
,
778 OPAL_DEASSERT_RESET
);
783 * Poll state of the PHB until the request is done
784 * successfully. The PHB reset is usually PHB complete
785 * reset followed by hot reset on root bus. So we also
786 * need the PCI bus settlement delay.
789 rc
= pnv_eeh_poll(phb
->opal_id
);
790 if (option
== EEH_RESET_DEACTIVATE
) {
791 if (system_state
< SYSTEM_RUNNING
)
792 udelay(1000 * EEH_PE_RST_SETTLE_TIME
);
794 msleep(EEH_PE_RST_SETTLE_TIME
);
797 if (rc
!= OPAL_SUCCESS
)
803 static int pnv_eeh_root_reset(struct pci_controller
*hose
, int option
)
805 struct pnv_phb
*phb
= hose
->private_data
;
806 s64 rc
= OPAL_HARDWARE
;
808 pr_debug("%s: Reset PHB#%x, option=%d\n",
809 __func__
, hose
->global_number
, option
);
812 * During the reset deassert time, we needn't care
813 * the reset scope because the firmware does nothing
814 * for fundamental or hot reset during deassert phase.
816 if (option
== EEH_RESET_FUNDAMENTAL
)
817 rc
= opal_pci_reset(phb
->opal_id
,
818 OPAL_RESET_PCI_FUNDAMENTAL
,
820 else if (option
== EEH_RESET_HOT
)
821 rc
= opal_pci_reset(phb
->opal_id
,
824 else if (option
== EEH_RESET_DEACTIVATE
)
825 rc
= opal_pci_reset(phb
->opal_id
,
827 OPAL_DEASSERT_RESET
);
831 /* Poll state of the PHB until the request is done */
833 rc
= pnv_eeh_poll(phb
->opal_id
);
834 if (option
== EEH_RESET_DEACTIVATE
)
835 msleep(EEH_PE_RST_SETTLE_TIME
);
837 if (rc
!= OPAL_SUCCESS
)
843 static int __pnv_eeh_bridge_reset(struct pci_dev
*dev
, int option
)
845 struct pci_dn
*pdn
= pci_get_pdn_by_devfn(dev
->bus
, dev
->devfn
);
846 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
847 int aer
= edev
? edev
->aer_cap
: 0;
850 pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
851 __func__
, pci_domain_nr(dev
->bus
),
852 dev
->bus
->number
, option
);
855 case EEH_RESET_FUNDAMENTAL
:
857 /* Don't report linkDown event */
859 eeh_ops
->read_config(pdn
, aer
+ PCI_ERR_UNCOR_MASK
,
861 ctrl
|= PCI_ERR_UNC_SURPDN
;
862 eeh_ops
->write_config(pdn
, aer
+ PCI_ERR_UNCOR_MASK
,
866 eeh_ops
->read_config(pdn
, PCI_BRIDGE_CONTROL
, 2, &ctrl
);
867 ctrl
|= PCI_BRIDGE_CTL_BUS_RESET
;
868 eeh_ops
->write_config(pdn
, PCI_BRIDGE_CONTROL
, 2, ctrl
);
870 msleep(EEH_PE_RST_HOLD_TIME
);
872 case EEH_RESET_DEACTIVATE
:
873 eeh_ops
->read_config(pdn
, PCI_BRIDGE_CONTROL
, 2, &ctrl
);
874 ctrl
&= ~PCI_BRIDGE_CTL_BUS_RESET
;
875 eeh_ops
->write_config(pdn
, PCI_BRIDGE_CONTROL
, 2, ctrl
);
877 msleep(EEH_PE_RST_SETTLE_TIME
);
879 /* Continue reporting linkDown event */
881 eeh_ops
->read_config(pdn
, aer
+ PCI_ERR_UNCOR_MASK
,
883 ctrl
&= ~PCI_ERR_UNC_SURPDN
;
884 eeh_ops
->write_config(pdn
, aer
+ PCI_ERR_UNCOR_MASK
,
894 static int pnv_eeh_bridge_reset(struct pci_dev
*pdev
, int option
)
896 struct pci_controller
*hose
= pci_bus_to_host(pdev
->bus
);
897 struct pnv_phb
*phb
= hose
->private_data
;
898 struct device_node
*dn
= pci_device_to_OF_node(pdev
);
899 uint64_t id
= PCI_SLOT_ID(phb
->opal_id
,
900 (pdev
->bus
->number
<< 8) | pdev
->devfn
);
904 /* Hot reset to the bus if firmware cannot handle */
905 if (!dn
|| !of_get_property(dn
, "ibm,reset-by-firmware", NULL
))
906 return __pnv_eeh_bridge_reset(pdev
, option
);
909 case EEH_RESET_FUNDAMENTAL
:
910 scope
= OPAL_RESET_PCI_FUNDAMENTAL
;
913 scope
= OPAL_RESET_PCI_HOT
;
915 case EEH_RESET_DEACTIVATE
:
918 dev_dbg(&pdev
->dev
, "%s: Unsupported reset %d\n",
923 rc
= opal_pci_reset(id
, scope
, OPAL_ASSERT_RESET
);
924 if (rc
<= OPAL_SUCCESS
)
927 rc
= pnv_eeh_poll(id
);
929 return (rc
== OPAL_SUCCESS
) ? 0 : -EIO
;
932 void pnv_pci_reset_secondary_bus(struct pci_dev
*dev
)
934 struct pci_controller
*hose
;
936 if (pci_is_root_bus(dev
->bus
)) {
937 hose
= pci_bus_to_host(dev
->bus
);
938 pnv_eeh_root_reset(hose
, EEH_RESET_HOT
);
939 pnv_eeh_root_reset(hose
, EEH_RESET_DEACTIVATE
);
941 pnv_eeh_bridge_reset(dev
, EEH_RESET_HOT
);
942 pnv_eeh_bridge_reset(dev
, EEH_RESET_DEACTIVATE
);
946 static void pnv_eeh_wait_for_pending(struct pci_dn
*pdn
, const char *type
,
951 /* Wait for Transaction Pending bit to be cleared */
952 for (i
= 0; i
< 4; i
++) {
953 eeh_ops
->read_config(pdn
, pos
, 2, &status
);
954 if (!(status
& mask
))
957 msleep((1 << i
) * 100);
960 pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
962 pdn
->phb
->global_number
, pdn
->busno
,
963 PCI_SLOT(pdn
->devfn
), PCI_FUNC(pdn
->devfn
));
966 static int pnv_eeh_do_flr(struct pci_dn
*pdn
, int option
)
968 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
971 if (WARN_ON(!edev
->pcie_cap
))
974 eeh_ops
->read_config(pdn
, edev
->pcie_cap
+ PCI_EXP_DEVCAP
, 4, ®
);
975 if (!(reg
& PCI_EXP_DEVCAP_FLR
))
980 case EEH_RESET_FUNDAMENTAL
:
981 pnv_eeh_wait_for_pending(pdn
, "",
982 edev
->pcie_cap
+ PCI_EXP_DEVSTA
,
983 PCI_EXP_DEVSTA_TRPND
);
984 eeh_ops
->read_config(pdn
, edev
->pcie_cap
+ PCI_EXP_DEVCTL
,
986 reg
|= PCI_EXP_DEVCTL_BCR_FLR
;
987 eeh_ops
->write_config(pdn
, edev
->pcie_cap
+ PCI_EXP_DEVCTL
,
989 msleep(EEH_PE_RST_HOLD_TIME
);
991 case EEH_RESET_DEACTIVATE
:
992 eeh_ops
->read_config(pdn
, edev
->pcie_cap
+ PCI_EXP_DEVCTL
,
994 reg
&= ~PCI_EXP_DEVCTL_BCR_FLR
;
995 eeh_ops
->write_config(pdn
, edev
->pcie_cap
+ PCI_EXP_DEVCTL
,
997 msleep(EEH_PE_RST_SETTLE_TIME
);
1004 static int pnv_eeh_do_af_flr(struct pci_dn
*pdn
, int option
)
1006 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
1009 if (WARN_ON(!edev
->af_cap
))
1012 eeh_ops
->read_config(pdn
, edev
->af_cap
+ PCI_AF_CAP
, 1, &cap
);
1013 if (!(cap
& PCI_AF_CAP_TP
) || !(cap
& PCI_AF_CAP_FLR
))
1018 case EEH_RESET_FUNDAMENTAL
:
1020 * Wait for Transaction Pending bit to clear. A word-aligned
1021 * test is used, so we use the conrol offset rather than status
1022 * and shift the test bit to match.
1024 pnv_eeh_wait_for_pending(pdn
, "AF",
1025 edev
->af_cap
+ PCI_AF_CTRL
,
1026 PCI_AF_STATUS_TP
<< 8);
1027 eeh_ops
->write_config(pdn
, edev
->af_cap
+ PCI_AF_CTRL
,
1028 1, PCI_AF_CTRL_FLR
);
1029 msleep(EEH_PE_RST_HOLD_TIME
);
1031 case EEH_RESET_DEACTIVATE
:
1032 eeh_ops
->write_config(pdn
, edev
->af_cap
+ PCI_AF_CTRL
, 1, 0);
1033 msleep(EEH_PE_RST_SETTLE_TIME
);
1040 static int pnv_eeh_reset_vf_pe(struct eeh_pe
*pe
, int option
)
1042 struct eeh_dev
*edev
;
1046 /* The VF PE should have only one child device */
1047 edev
= list_first_entry_or_null(&pe
->edevs
, struct eeh_dev
, list
);
1048 pdn
= eeh_dev_to_pdn(edev
);
1052 ret
= pnv_eeh_do_flr(pdn
, option
);
1056 return pnv_eeh_do_af_flr(pdn
, option
);
1060 * pnv_eeh_reset - Reset the specified PE
1062 * @option: reset option
1064 * Do reset on the indicated PE. For PCI bus sensitive PE,
1065 * we need to reset the parent p2p bridge. The PHB has to
1066 * be reinitialized if the p2p bridge is root bridge. For
1067 * PCI device sensitive PE, we will try to reset the device
1068 * through FLR. For now, we don't have OPAL APIs to do HARD
1069 * reset yet, so all reset would be SOFT (HOT) reset.
1071 static int pnv_eeh_reset(struct eeh_pe
*pe
, int option
)
1073 struct pci_controller
*hose
= pe
->phb
;
1074 struct pnv_phb
*phb
;
1075 struct pci_bus
*bus
;
1079 * For PHB reset, we always have complete reset. For those PEs whose
1080 * primary bus derived from root complex (root bus) or root port
1081 * (usually bus#1), we apply hot or fundamental reset on the root port.
1082 * For other PEs, we always have hot reset on the PE primary bus.
1084 * Here, we have different design to pHyp, which always clear the
1085 * frozen state during PE reset. However, the good idea here from
1086 * benh is to keep frozen state before we get PE reset done completely
1087 * (until BAR restore). With the frozen state, HW drops illegal IO
1088 * or MMIO access, which can incur recrusive frozen PE during PE
1089 * reset. The side effect is that EEH core has to clear the frozen
1090 * state explicitly after BAR restore.
1092 if (pe
->type
& EEH_PE_PHB
)
1093 return pnv_eeh_phb_reset(hose
, option
);
1096 * The frozen PE might be caused by PAPR error injection
1097 * registers, which are expected to be cleared after hitting
1098 * frozen PE as stated in the hardware spec. Unfortunately,
1099 * that's not true on P7IOC. So we have to clear it manually
1100 * to avoid recursive EEH errors during recovery.
1102 phb
= hose
->private_data
;
1103 if (phb
->model
== PNV_PHB_MODEL_P7IOC
&&
1104 (option
== EEH_RESET_HOT
||
1105 option
== EEH_RESET_FUNDAMENTAL
)) {
1106 rc
= opal_pci_reset(phb
->opal_id
,
1107 OPAL_RESET_PHB_ERROR
,
1109 if (rc
!= OPAL_SUCCESS
) {
1110 pr_warn("%s: Failure %lld clearing error injection registers\n",
1116 if (pe
->type
& EEH_PE_VF
)
1117 return pnv_eeh_reset_vf_pe(pe
, option
);
1119 bus
= eeh_pe_bus_get(pe
);
1121 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
1122 __func__
, pe
->phb
->global_number
, pe
->addr
);
1127 * If dealing with the root bus (or the bus underneath the
1128 * root port), we reset the bus underneath the root port.
1130 * The cxl driver depends on this behaviour for bi-modal card
1133 if (pci_is_root_bus(bus
) ||
1134 pci_is_root_bus(bus
->parent
))
1135 return pnv_eeh_root_reset(hose
, option
);
1137 return pnv_eeh_bridge_reset(bus
->self
, option
);
1141 * pnv_eeh_wait_state - Wait for PE state
1143 * @max_wait: maximal period in millisecond
1145 * Wait for the state of associated PE. It might take some time
1146 * to retrieve the PE's state.
1148 static int pnv_eeh_wait_state(struct eeh_pe
*pe
, int max_wait
)
1154 ret
= pnv_eeh_get_state(pe
, &mwait
);
1157 * If the PE's state is temporarily unavailable,
1158 * we have to wait for the specified time. Otherwise,
1159 * the PE's state will be returned immediately.
1161 if (ret
!= EEH_STATE_UNAVAILABLE
)
1164 if (max_wait
<= 0) {
1165 pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
1166 __func__
, pe
->addr
, max_wait
);
1167 return EEH_STATE_NOT_SUPPORT
;
1174 return EEH_STATE_NOT_SUPPORT
;
1178 * pnv_eeh_get_log - Retrieve error log
1180 * @severity: temporary or permanent error log
1181 * @drv_log: driver log to be combined with retrieved error log
1182 * @len: length of driver log
1184 * Retrieve the temporary or permanent error from the PE.
1186 static int pnv_eeh_get_log(struct eeh_pe
*pe
, int severity
,
1187 char *drv_log
, unsigned long len
)
1189 if (!eeh_has_flag(EEH_EARLY_DUMP_LOG
))
1190 pnv_pci_dump_phb_diag_data(pe
->phb
, pe
->data
);
1196 * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
1199 * The function will be called to reconfigure the bridges included
1200 * in the specified PE so that the mulfunctional PE would be recovered
1203 static int pnv_eeh_configure_bridge(struct eeh_pe
*pe
)
1209 * pnv_pe_err_inject - Inject specified error to the indicated PE
1210 * @pe: the indicated PE
1212 * @func: specific error type
1214 * @mask: address mask
1216 * The routine is called to inject specified error, which is
1217 * determined by @type and @func, to the indicated PE for
1220 static int pnv_eeh_err_inject(struct eeh_pe
*pe
, int type
, int func
,
1221 unsigned long addr
, unsigned long mask
)
1223 struct pci_controller
*hose
= pe
->phb
;
1224 struct pnv_phb
*phb
= hose
->private_data
;
1227 if (type
!= OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR
&&
1228 type
!= OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64
) {
1229 pr_warn("%s: Invalid error type %d\n",
1234 if (func
< OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR
||
1235 func
> OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET
) {
1236 pr_warn("%s: Invalid error function %d\n",
1241 /* Firmware supports error injection ? */
1242 if (!opal_check_token(OPAL_PCI_ERR_INJECT
)) {
1243 pr_warn("%s: Firmware doesn't support error injection\n",
1248 /* Do error injection */
1249 rc
= opal_pci_err_inject(phb
->opal_id
, pe
->addr
,
1250 type
, func
, addr
, mask
);
1251 if (rc
!= OPAL_SUCCESS
) {
1252 pr_warn("%s: Failure %lld injecting error "
1253 "%d-%d to PHB#%x-PE#%x\n",
1254 __func__
, rc
, type
, func
,
1255 hose
->global_number
, pe
->addr
);
1262 static inline bool pnv_eeh_cfg_blocked(struct pci_dn
*pdn
)
1264 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
1266 if (!edev
|| !edev
->pe
)
1270 * We will issue FLR or AF FLR to all VFs, which are contained
1271 * in VF PE. It relies on the EEH PCI config accessors. So we
1272 * can't block them during the window.
1274 if (edev
->physfn
&& (edev
->pe
->state
& EEH_PE_RESET
))
1277 if (edev
->pe
->state
& EEH_PE_CFG_BLOCKED
)
1283 static int pnv_eeh_read_config(struct pci_dn
*pdn
,
1284 int where
, int size
, u32
*val
)
1287 return PCIBIOS_DEVICE_NOT_FOUND
;
1289 if (pnv_eeh_cfg_blocked(pdn
)) {
1291 return PCIBIOS_SET_FAILED
;
1294 return pnv_pci_cfg_read(pdn
, where
, size
, val
);
1297 static int pnv_eeh_write_config(struct pci_dn
*pdn
,
1298 int where
, int size
, u32 val
)
1301 return PCIBIOS_DEVICE_NOT_FOUND
;
1303 if (pnv_eeh_cfg_blocked(pdn
))
1304 return PCIBIOS_SET_FAILED
;
1306 return pnv_pci_cfg_write(pdn
, where
, size
, val
);
1309 static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData
*data
)
1312 if (data
->gemXfir
|| data
->gemRfir
||
1313 data
->gemRirqfir
|| data
->gemMask
|| data
->gemRwof
)
1314 pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n",
1315 be64_to_cpu(data
->gemXfir
),
1316 be64_to_cpu(data
->gemRfir
),
1317 be64_to_cpu(data
->gemRirqfir
),
1318 be64_to_cpu(data
->gemMask
),
1319 be64_to_cpu(data
->gemRwof
));
1322 if (data
->lemFir
|| data
->lemErrMask
||
1323 data
->lemAction0
|| data
->lemAction1
|| data
->lemWof
)
1324 pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n",
1325 be64_to_cpu(data
->lemFir
),
1326 be64_to_cpu(data
->lemErrMask
),
1327 be64_to_cpu(data
->lemAction0
),
1328 be64_to_cpu(data
->lemAction1
),
1329 be64_to_cpu(data
->lemWof
));
1332 static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller
*hose
)
1334 struct pnv_phb
*phb
= hose
->private_data
;
1335 struct OpalIoP7IOCErrorData
*data
=
1336 (struct OpalIoP7IOCErrorData
*)phb
->diag_data
;
1339 rc
= opal_pci_get_hub_diag_data(phb
->hub_id
, data
, sizeof(*data
));
1340 if (rc
!= OPAL_SUCCESS
) {
1341 pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
1342 __func__
, phb
->hub_id
, rc
);
1346 switch (be16_to_cpu(data
->type
)) {
1347 case OPAL_P7IOC_DIAG_TYPE_RGC
:
1348 pr_info("P7IOC diag-data for RGC\n\n");
1349 pnv_eeh_dump_hub_diag_common(data
);
1350 if (data
->rgc
.rgcStatus
|| data
->rgc
.rgcLdcp
)
1351 pr_info(" RGC: %016llx %016llx\n",
1352 be64_to_cpu(data
->rgc
.rgcStatus
),
1353 be64_to_cpu(data
->rgc
.rgcLdcp
));
1355 case OPAL_P7IOC_DIAG_TYPE_BI
:
1356 pr_info("P7IOC diag-data for BI %s\n\n",
1357 data
->bi
.biDownbound
? "Downbound" : "Upbound");
1358 pnv_eeh_dump_hub_diag_common(data
);
1359 if (data
->bi
.biLdcp0
|| data
->bi
.biLdcp1
||
1360 data
->bi
.biLdcp2
|| data
->bi
.biFenceStatus
)
1361 pr_info(" BI: %016llx %016llx %016llx %016llx\n",
1362 be64_to_cpu(data
->bi
.biLdcp0
),
1363 be64_to_cpu(data
->bi
.biLdcp1
),
1364 be64_to_cpu(data
->bi
.biLdcp2
),
1365 be64_to_cpu(data
->bi
.biFenceStatus
));
1367 case OPAL_P7IOC_DIAG_TYPE_CI
:
1368 pr_info("P7IOC diag-data for CI Port %d\n\n",
1370 pnv_eeh_dump_hub_diag_common(data
);
1371 if (data
->ci
.ciPortStatus
|| data
->ci
.ciPortLdcp
)
1372 pr_info(" CI: %016llx %016llx\n",
1373 be64_to_cpu(data
->ci
.ciPortStatus
),
1374 be64_to_cpu(data
->ci
.ciPortLdcp
));
1376 case OPAL_P7IOC_DIAG_TYPE_MISC
:
1377 pr_info("P7IOC diag-data for MISC\n\n");
1378 pnv_eeh_dump_hub_diag_common(data
);
1380 case OPAL_P7IOC_DIAG_TYPE_I2C
:
1381 pr_info("P7IOC diag-data for I2C\n\n");
1382 pnv_eeh_dump_hub_diag_common(data
);
1385 pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
1386 __func__
, phb
->hub_id
, data
->type
);
1390 static int pnv_eeh_get_pe(struct pci_controller
*hose
,
1391 u16 pe_no
, struct eeh_pe
**pe
)
1393 struct pnv_phb
*phb
= hose
->private_data
;
1394 struct pnv_ioda_pe
*pnv_pe
;
1395 struct eeh_pe
*dev_pe
;
1398 * If PHB supports compound PE, to fetch
1399 * the master PE because slave PE is invisible
1402 pnv_pe
= &phb
->ioda
.pe_array
[pe_no
];
1403 if (pnv_pe
->flags
& PNV_IODA_PE_SLAVE
) {
1404 pnv_pe
= pnv_pe
->master
;
1406 !(pnv_pe
->flags
& PNV_IODA_PE_MASTER
));
1407 pe_no
= pnv_pe
->pe_number
;
1410 /* Find the PE according to PE# */
1411 dev_pe
= eeh_pe_get(hose
, pe_no
, 0);
1415 /* Freeze the (compound) PE */
1417 if (!(dev_pe
->state
& EEH_PE_ISOLATED
))
1418 phb
->freeze_pe(phb
, pe_no
);
1421 * At this point, we're sure the (compound) PE should
1422 * have been frozen. However, we still need poke until
1423 * hitting the frozen PE on top level.
1425 dev_pe
= dev_pe
->parent
;
1426 while (dev_pe
&& !(dev_pe
->type
& EEH_PE_PHB
)) {
1428 int active_flags
= (EEH_STATE_MMIO_ACTIVE
|
1429 EEH_STATE_DMA_ACTIVE
);
1431 ret
= eeh_ops
->get_state(dev_pe
, NULL
);
1432 if (ret
<= 0 || (ret
& active_flags
) == active_flags
) {
1433 dev_pe
= dev_pe
->parent
;
1437 /* Frozen parent PE */
1439 if (!(dev_pe
->state
& EEH_PE_ISOLATED
))
1440 phb
->freeze_pe(phb
, dev_pe
->addr
);
1443 dev_pe
= dev_pe
->parent
;
1450 * pnv_eeh_next_error - Retrieve next EEH error to handle
1453 * The function is expected to be called by EEH core while it gets
1454 * special EEH event (without binding PE). The function calls to
1455 * OPAL APIs for next error to handle. The informational error is
1456 * handled internally by platform. However, the dead IOC, dead PHB,
1457 * fenced PHB and frozen PE should be handled by EEH core eventually.
1459 static int pnv_eeh_next_error(struct eeh_pe
**pe
)
1461 struct pci_controller
*hose
;
1462 struct pnv_phb
*phb
;
1463 struct eeh_pe
*phb_pe
, *parent_pe
;
1464 __be64 frozen_pe_no
;
1465 __be16 err_type
, severity
;
1466 int active_flags
= (EEH_STATE_MMIO_ACTIVE
| EEH_STATE_DMA_ACTIVE
);
1468 int state
, ret
= EEH_NEXT_ERR_NONE
;
1471 * While running here, it's safe to purge the event queue. The
1472 * event should still be masked.
1474 eeh_remove_event(NULL
, false);
1476 list_for_each_entry(hose
, &hose_list
, list_node
) {
1478 * If the subordinate PCI buses of the PHB has been
1479 * removed or is exactly under error recovery, we
1480 * needn't take care of it any more.
1482 phb
= hose
->private_data
;
1483 phb_pe
= eeh_phb_pe_get(hose
);
1484 if (!phb_pe
|| (phb_pe
->state
& EEH_PE_ISOLATED
))
1487 rc
= opal_pci_next_error(phb
->opal_id
,
1488 &frozen_pe_no
, &err_type
, &severity
);
1489 if (rc
!= OPAL_SUCCESS
) {
1490 pr_devel("%s: Invalid return value on "
1491 "PHB#%x (0x%lx) from opal_pci_next_error",
1492 __func__
, hose
->global_number
, rc
);
1496 /* If the PHB doesn't have error, stop processing */
1497 if (be16_to_cpu(err_type
) == OPAL_EEH_NO_ERROR
||
1498 be16_to_cpu(severity
) == OPAL_EEH_SEV_NO_ERROR
) {
1499 pr_devel("%s: No error found on PHB#%x\n",
1500 __func__
, hose
->global_number
);
1505 * Processing the error. We're expecting the error with
1506 * highest priority reported upon multiple errors on the
1509 pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
1510 __func__
, be16_to_cpu(err_type
),
1511 be16_to_cpu(severity
), be64_to_cpu(frozen_pe_no
),
1512 hose
->global_number
);
1513 switch (be16_to_cpu(err_type
)) {
1514 case OPAL_EEH_IOC_ERROR
:
1515 if (be16_to_cpu(severity
) == OPAL_EEH_SEV_IOC_DEAD
) {
1516 pr_err("EEH: dead IOC detected\n");
1517 ret
= EEH_NEXT_ERR_DEAD_IOC
;
1518 } else if (be16_to_cpu(severity
) == OPAL_EEH_SEV_INF
) {
1519 pr_info("EEH: IOC informative error "
1521 pnv_eeh_get_and_dump_hub_diag(hose
);
1522 ret
= EEH_NEXT_ERR_NONE
;
1526 case OPAL_EEH_PHB_ERROR
:
1527 if (be16_to_cpu(severity
) == OPAL_EEH_SEV_PHB_DEAD
) {
1529 pr_err("EEH: dead PHB#%x detected, "
1531 hose
->global_number
,
1532 eeh_pe_loc_get(phb_pe
));
1533 ret
= EEH_NEXT_ERR_DEAD_PHB
;
1534 } else if (be16_to_cpu(severity
) ==
1535 OPAL_EEH_SEV_PHB_FENCED
) {
1537 pr_err("EEH: Fenced PHB#%x detected, "
1539 hose
->global_number
,
1540 eeh_pe_loc_get(phb_pe
));
1541 ret
= EEH_NEXT_ERR_FENCED_PHB
;
1542 } else if (be16_to_cpu(severity
) == OPAL_EEH_SEV_INF
) {
1543 pr_info("EEH: PHB#%x informative error "
1544 "detected, location: %s\n",
1545 hose
->global_number
,
1546 eeh_pe_loc_get(phb_pe
));
1547 pnv_eeh_get_phb_diag(phb_pe
);
1548 pnv_pci_dump_phb_diag_data(hose
, phb_pe
->data
);
1549 ret
= EEH_NEXT_ERR_NONE
;
1553 case OPAL_EEH_PE_ERROR
:
1555 * If we can't find the corresponding PE, we
1556 * just try to unfreeze.
1558 if (pnv_eeh_get_pe(hose
,
1559 be64_to_cpu(frozen_pe_no
), pe
)) {
1560 pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
1561 hose
->global_number
, be64_to_cpu(frozen_pe_no
));
1562 pr_info("EEH: PHB location: %s\n",
1563 eeh_pe_loc_get(phb_pe
));
1565 /* Dump PHB diag-data */
1566 rc
= opal_pci_get_phb_diag_data2(phb
->opal_id
,
1567 phb
->diag_data
, phb
->diag_data_size
);
1568 if (rc
== OPAL_SUCCESS
)
1569 pnv_pci_dump_phb_diag_data(hose
,
1572 /* Try best to clear it */
1573 opal_pci_eeh_freeze_clear(phb
->opal_id
,
1574 be64_to_cpu(frozen_pe_no
),
1575 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL
);
1576 ret
= EEH_NEXT_ERR_NONE
;
1577 } else if ((*pe
)->state
& EEH_PE_ISOLATED
||
1578 eeh_pe_passed(*pe
)) {
1579 ret
= EEH_NEXT_ERR_NONE
;
1581 pr_err("EEH: Frozen PE#%x "
1582 "on PHB#%x detected\n",
1584 (*pe
)->phb
->global_number
);
1585 pr_err("EEH: PE location: %s, "
1586 "PHB location: %s\n",
1587 eeh_pe_loc_get(*pe
),
1588 eeh_pe_loc_get(phb_pe
));
1589 ret
= EEH_NEXT_ERR_FROZEN_PE
;
1594 pr_warn("%s: Unexpected error type %d\n",
1595 __func__
, be16_to_cpu(err_type
));
1599 * EEH core will try recover from fenced PHB or
1600 * frozen PE. In the time for frozen PE, EEH core
1601 * enable IO path for that before collecting logs,
1602 * but it ruins the site. So we have to dump the
1603 * log in advance here.
1605 if ((ret
== EEH_NEXT_ERR_FROZEN_PE
||
1606 ret
== EEH_NEXT_ERR_FENCED_PHB
) &&
1607 !((*pe
)->state
& EEH_PE_ISOLATED
)) {
1608 eeh_pe_state_mark(*pe
, EEH_PE_ISOLATED
);
1609 pnv_eeh_get_phb_diag(*pe
);
1611 if (eeh_has_flag(EEH_EARLY_DUMP_LOG
))
1612 pnv_pci_dump_phb_diag_data((*pe
)->phb
,
1617 * We probably have the frozen parent PE out there and
1618 * we need have to handle frozen parent PE firstly.
1620 if (ret
== EEH_NEXT_ERR_FROZEN_PE
) {
1621 parent_pe
= (*pe
)->parent
;
1623 /* Hit the ceiling ? */
1624 if (parent_pe
->type
& EEH_PE_PHB
)
1627 /* Frozen parent PE ? */
1628 state
= eeh_ops
->get_state(parent_pe
, NULL
);
1630 (state
& active_flags
) != active_flags
)
1633 /* Next parent level */
1634 parent_pe
= parent_pe
->parent
;
1637 /* We possibly migrate to another PE */
1638 eeh_pe_state_mark(*pe
, EEH_PE_ISOLATED
);
1642 * If we have no errors on the specific PHB or only
1643 * informative error there, we continue poking it.
1644 * Otherwise, we need actions to be taken by upper
1647 if (ret
> EEH_NEXT_ERR_INF
)
1651 /* Unmask the event */
1652 if (ret
== EEH_NEXT_ERR_NONE
&& eeh_enabled())
1653 enable_irq(eeh_event_irq
);
1658 static int pnv_eeh_restore_config(struct pci_dn
*pdn
)
1660 struct eeh_dev
*edev
= pdn_to_eeh_dev(pdn
);
1661 struct pnv_phb
*phb
;
1663 int config_addr
= (pdn
->busno
<< 8) | (pdn
->devfn
);
1669 * We have to restore the PCI config space after reset since the
1670 * firmware can't see SRIOV VFs.
1672 * FIXME: The MPS, error routing rules, timeout setting are worthy
1673 * to be exported by firmware in extendible way.
1676 ret
= eeh_restore_vf_config(pdn
);
1678 phb
= pdn
->phb
->private_data
;
1679 ret
= opal_pci_reinit(phb
->opal_id
,
1680 OPAL_REINIT_PCI_DEV
, config_addr
);
1684 pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
1685 __func__
, config_addr
, ret
);
1692 static struct eeh_ops pnv_eeh_ops
= {
1694 .init
= pnv_eeh_init
,
1695 .probe
= pnv_eeh_probe
,
1696 .set_option
= pnv_eeh_set_option
,
1697 .get_pe_addr
= pnv_eeh_get_pe_addr
,
1698 .get_state
= pnv_eeh_get_state
,
1699 .reset
= pnv_eeh_reset
,
1700 .wait_state
= pnv_eeh_wait_state
,
1701 .get_log
= pnv_eeh_get_log
,
1702 .configure_bridge
= pnv_eeh_configure_bridge
,
1703 .err_inject
= pnv_eeh_err_inject
,
1704 .read_config
= pnv_eeh_read_config
,
1705 .write_config
= pnv_eeh_write_config
,
1706 .next_error
= pnv_eeh_next_error
,
1707 .restore_config
= pnv_eeh_restore_config
,
1708 .notify_resume
= NULL
1711 #ifdef CONFIG_PCI_IOV
1712 static void pnv_pci_fixup_vf_mps(struct pci_dev
*pdev
)
1714 struct pci_dn
*pdn
= pci_get_pdn(pdev
);
1717 if (!pdev
->is_virtfn
)
1720 /* Synchronize MPS for VF and PF */
1721 parent_mps
= pcie_get_mps(pdev
->physfn
);
1722 if ((128 << pdev
->pcie_mpss
) >= parent_mps
)
1723 pcie_set_mps(pdev
, parent_mps
);
1724 pdn
->mps
= pcie_get_mps(pdev
);
1726 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID
, PCI_ANY_ID
, pnv_pci_fixup_vf_mps
);
1727 #endif /* CONFIG_PCI_IOV */
1730 * eeh_powernv_init - Register platform dependent EEH operations
1732 * EEH initialization on powernv platform. This function should be
1733 * called before any EEH related functions.
1735 static int __init
eeh_powernv_init(void)
1739 ret
= eeh_ops_register(&pnv_eeh_ops
);
1741 pr_info("EEH: PowerNV platform initialized\n");
1743 pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret
);
1747 machine_early_initcall(powernv
, eeh_powernv_init
);