2 * Support PCI/PCIe on PowerNV platforms
4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/delay.h>
17 #include <linux/string.h>
18 #include <linux/init.h>
19 #include <linux/bootmem.h>
20 #include <linux/irq.h>
22 #include <linux/msi.h>
24 #include <asm/sections.h>
27 #include <asm/pci-bridge.h>
28 #include <asm/machdep.h>
29 #include <asm/ppc-pci.h>
31 #include <asm/iommu.h>
33 #include <asm/abs_addr.h>
38 struct resource_wrap
{
39 struct list_head link
;
41 resource_size_t align
;
42 struct pci_dev
*dev
; /* Set if it's a device */
43 struct pci_bus
*bus
; /* Set if it's a bridge */
46 static int __pe_printk(const char *level
, const struct pnv_ioda_pe
*pe
,
47 struct va_format
*vaf
)
52 strlcpy(pfix
, dev_name(&pe
->pdev
->dev
), sizeof(pfix
));
54 sprintf(pfix
, "%04x:%02x ",
55 pci_domain_nr(pe
->pbus
), pe
->pbus
->number
);
56 return printk("pci %s%s: [PE# %.3d] %pV", level
, pfix
, pe
->pe_number
, vaf
);
59 #define define_pe_printk_level(func, kern_level) \
60 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
62 struct va_format vaf; \
66 va_start(args, fmt); \
71 r = __pe_printk(kern_level, pe, &vaf); \
77 define_pe_printk_level(pe_err, KERN_ERR);
78 define_pe_printk_level(pe_warn
, KERN_WARNING
);
79 define_pe_printk_level(pe_info
, KERN_INFO
);
82 /* Calculate resource usage & alignment requirement of a single
83 * device. This will also assign all resources within the device
84 * for a given type starting at 0 for the biggest one and then
85 * assigning in decreasing order of size.
87 static void __devinit
pnv_ioda_calc_dev(struct pci_dev
*dev
, unsigned int flags
,
88 resource_size_t
*size
,
89 resource_size_t
*align
)
91 resource_size_t start
;
95 pr_devel(" -> CDR %s\n", pci_name(dev
));
99 /* Clear the resources out and mark them all unset */
100 for (i
= 0; i
<= PCI_ROM_RESOURCE
; i
++) {
101 r
= &dev
->resource
[i
];
102 if (!(r
->flags
& flags
))
108 r
->flags
|= IORESOURCE_UNSET
;
111 /* We currently keep all memory resources together, we
112 * will handle prefetch & 64-bit separately in the future
113 * but for now we stick everybody in M32
117 resource_size_t max_size
= 0;
120 /* Find next biggest resource */
121 for (i
= 0; i
<= PCI_ROM_RESOURCE
; i
++) {
122 r
= &dev
->resource
[i
];
123 if (!(r
->flags
& IORESOURCE_UNSET
) ||
126 if (resource_size(r
) > max_size
) {
127 max_size
= resource_size(r
);
133 r
= &dev
->resource
[max_no
];
134 if (max_size
> *align
)
139 r
->end
= r
->start
+ max_size
- 1;
140 r
->flags
&= ~IORESOURCE_UNSET
;
141 pr_devel(" -> R%d %016llx..%016llx\n",
142 max_no
, r
->start
, r
->end
);
144 pr_devel(" <- CDR %s size=%llx align=%llx\n",
145 pci_name(dev
), *size
, *align
);
148 /* Allocate a resource "wrap" for a given device or bridge and
149 * insert it at the right position in the sorted list
151 static void __devinit
pnv_ioda_add_wrap(struct list_head
*list
,
154 resource_size_t size
,
155 resource_size_t align
)
157 struct resource_wrap
*w1
, *w
= kzalloc(sizeof(*w
), GFP_KERNEL
);
164 list_for_each_entry(w1
, list
, link
) {
165 if (w1
->align
< align
) {
166 list_add_tail(&w
->link
, &w1
->link
);
170 list_add_tail(&w
->link
, list
);
173 /* Offset device resources of a given type */
174 static void __devinit
pnv_ioda_offset_dev(struct pci_dev
*dev
,
176 resource_size_t offset
)
181 pr_devel(" -> ODR %s [%x] +%016llx\n", pci_name(dev
), flags
, offset
);
183 for (i
= 0; i
<= PCI_ROM_RESOURCE
; i
++) {
184 r
= &dev
->resource
[i
];
185 if (r
->flags
& flags
) {
186 dev
->resource
[i
].start
+= offset
;
187 dev
->resource
[i
].end
+= offset
;
191 pr_devel(" <- ODR %s [%x] +%016llx\n", pci_name(dev
), flags
, offset
);
194 /* Offset bus resources (& all children) of a given type */
195 static void __devinit
pnv_ioda_offset_bus(struct pci_bus
*bus
,
197 resource_size_t offset
)
201 struct pci_bus
*cbus
;
204 pr_devel(" -> OBR %s [%x] +%016llx\n",
205 bus
->self
? pci_name(bus
->self
) : "root", flags
, offset
);
207 pci_bus_for_each_resource(bus
, r
, i
) {
208 if (r
&& (r
->flags
& flags
)) {
213 list_for_each_entry(dev
, &bus
->devices
, bus_list
)
214 pnv_ioda_offset_dev(dev
, flags
, offset
);
215 list_for_each_entry(cbus
, &bus
->children
, node
)
216 pnv_ioda_offset_bus(cbus
, flags
, offset
);
218 pr_devel(" <- OBR %s [%x]\n",
219 bus
->self
? pci_name(bus
->self
) : "root", flags
);
222 /* This is the guts of our IODA resource allocation. This is called
223 * recursively for each bus in the system. It calculates all the
224 * necessary size and requirements for children and assign them
225 * resources such that:
227 * - Each function fits in it's own contiguous set of IO/M32
230 * - All segments behind a P2P bridge are contiguous and obey
231 * alignment constraints of those bridges
233 static void __devinit
pnv_ioda_calc_bus(struct pci_bus
*bus
, unsigned int flags
,
234 resource_size_t
*size
,
235 resource_size_t
*align
)
237 struct pci_controller
*hose
= pci_bus_to_host(bus
);
238 struct pnv_phb
*phb
= hose
->private_data
;
239 resource_size_t dev_size
, dev_align
, start
;
240 resource_size_t min_align
, min_balign
;
241 struct pci_dev
*cdev
;
242 struct pci_bus
*cbus
;
243 struct list_head head
;
244 struct resource_wrap
*w
;
249 pr_devel("-> CBR %s [%x]\n",
250 bus
->self
? pci_name(bus
->self
) : "root", flags
);
252 /* Calculate alignment requirements based on the type
253 * of resource we are working on
255 if (flags
& IORESOURCE_IO
) {
257 min_align
= phb
->ioda
.io_segsize
;
261 min_align
= phb
->ioda
.m32_segsize
;
262 min_balign
= 0x100000;
265 /* Gather all our children resources ordered by alignment */
266 INIT_LIST_HEAD(&head
);
269 list_for_each_entry(cbus
, &bus
->children
, node
) {
270 pnv_ioda_calc_bus(cbus
, flags
, &dev_size
, &dev_align
);
271 pnv_ioda_add_wrap(&head
, cbus
, NULL
, dev_size
, dev_align
);
275 list_for_each_entry(cdev
, &bus
->devices
, bus_list
) {
276 pnv_ioda_calc_dev(cdev
, flags
, &dev_size
, &dev_align
);
277 /* Align them to segment size */
278 if (dev_align
< min_align
)
279 dev_align
= min_align
;
280 pnv_ioda_add_wrap(&head
, NULL
, cdev
, dev_size
, dev_align
);
282 if (list_empty(&head
))
285 /* Now we can do two things: assign offsets to them within that
286 * level and get our total alignment & size requirements. The
287 * assignment algorithm is going to be uber-trivial for now, we
288 * can try to be smarter later at filling out holes.
291 /* No offset for downstream bridges */
294 /* Offset from the root */
295 if (flags
& IORESOURCE_IO
)
296 /* Don't hand out IO 0 */
297 start
= hose
->io_resource
.start
+ 0x1000;
299 start
= hose
->mem_resources
[0].start
;
301 while(!list_empty(&head
)) {
302 w
= list_first_entry(&head
, struct resource_wrap
, link
);
306 start
= ALIGN(start
, w
->align
);
308 pnv_ioda_offset_dev(w
->dev
,flags
,start
);
310 pnv_ioda_offset_bus(w
->bus
,flags
,start
);
312 if (w
->align
> *align
)
320 /* Align and setup bridge resources */
321 *align
= max_t(resource_size_t
, *align
,
322 max_t(resource_size_t
, min_align
, min_balign
));
324 max_t(resource_size_t
, min_align
, min_balign
));
326 /* Only setup P2P's, not the PHB itself */
328 struct resource
*res
= bus
->resource
[bres
];
330 if (WARN_ON(res
== NULL
))
334 * FIXME: We should probably export and call
335 * pci_bridge_check_ranges() to properly re-initialize
336 * the PCI portion of the flags here, and to detect
337 * what the bridge actually supports.
340 res
->flags
= (*size
) ? flags
: 0;
341 res
->end
= (*size
) ? (*size
- 1) : 0;
344 pr_devel("<- CBR %s [%x] *size=%016llx *align=%016llx\n",
345 bus
->self
? pci_name(bus
->self
) : "root", flags
,*size
,*align
);
348 static struct pci_dn
*pnv_ioda_get_pdn(struct pci_dev
*dev
)
350 struct device_node
*np
;
352 np
= pci_device_to_OF_node(dev
);
358 static void __devinit
pnv_ioda_setup_pe_segments(struct pci_dev
*dev
)
360 struct pci_controller
*hose
= pci_bus_to_host(dev
->bus
);
361 struct pnv_phb
*phb
= hose
->private_data
;
362 struct pci_dn
*pdn
= pnv_ioda_get_pdn(dev
);
365 struct resource io_res
;
366 struct resource m32_res
;
367 struct pci_bus_region region
;
370 /* Anything not referenced in the device-tree gets PE#0 */
371 pe
= pdn
? pdn
->pe_number
: 0;
373 /* Calculate the device min/max */
374 io_res
.start
= m32_res
.start
= (resource_size_t
)-1;
375 io_res
.end
= m32_res
.end
= 0;
376 io_res
.flags
= IORESOURCE_IO
;
377 m32_res
.flags
= IORESOURCE_MEM
;
379 for (i
= 0; i
<= PCI_ROM_RESOURCE
; i
++) {
380 struct resource
*r
= NULL
;
381 if (dev
->resource
[i
].flags
& IORESOURCE_IO
)
383 if (dev
->resource
[i
].flags
& IORESOURCE_MEM
)
387 if (dev
->resource
[i
].start
< r
->start
)
388 r
->start
= dev
->resource
[i
].start
;
389 if (dev
->resource
[i
].end
> r
->end
)
390 r
->end
= dev
->resource
[i
].end
;
393 /* Setup IO segments */
394 if (io_res
.start
< io_res
.end
) {
395 pcibios_resource_to_bus(dev
, ®ion
, &io_res
);
397 i
= pos
/ phb
->ioda
.io_segsize
;
398 while(i
< phb
->ioda
.total_pe
&& pos
<= region
.end
) {
399 if (phb
->ioda
.io_segmap
[i
]) {
400 pr_err("%s: Trying to use IO seg #%d which is"
401 " already used by PE# %d\n",
403 phb
->ioda
.io_segmap
[i
]);
404 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
407 phb
->ioda
.io_segmap
[i
] = pe
;
408 rc
= opal_pci_map_pe_mmio_window(phb
->opal_id
, pe
,
411 if (rc
!= OPAL_SUCCESS
) {
412 pr_err("%s: OPAL error %d setting up mapping"
414 pci_name(dev
), rc
, i
);
415 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
418 pos
+= phb
->ioda
.io_segsize
;
423 /* Setup M32 segments */
424 if (m32_res
.start
< m32_res
.end
) {
425 pcibios_resource_to_bus(dev
, ®ion
, &m32_res
);
427 i
= pos
/ phb
->ioda
.m32_segsize
;
428 while(i
< phb
->ioda
.total_pe
&& pos
<= region
.end
) {
429 if (phb
->ioda
.m32_segmap
[i
]) {
430 pr_err("%s: Trying to use M32 seg #%d which is"
431 " already used by PE# %d\n",
433 phb
->ioda
.m32_segmap
[i
]);
434 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
437 phb
->ioda
.m32_segmap
[i
] = pe
;
438 rc
= opal_pci_map_pe_mmio_window(phb
->opal_id
, pe
,
439 OPAL_M32_WINDOW_TYPE
,
441 if (rc
!= OPAL_SUCCESS
) {
442 pr_err("%s: OPAL error %d setting up mapping"
443 " for M32 seg# %d\n",
444 pci_name(dev
), rc
, i
);
445 /* XXX DO SOMETHING TO DISABLE DEVICE ? */
448 pos
+= phb
->ioda
.m32_segsize
;
454 /* Check if a resource still fits in the total IO or M32 range
457 static int __devinit
pnv_ioda_resource_fit(struct pci_controller
*hose
,
460 struct resource
*bounds
;
462 if (r
->flags
& IORESOURCE_IO
)
463 bounds
= &hose
->io_resource
;
464 else if (r
->flags
& IORESOURCE_MEM
)
465 bounds
= &hose
->mem_resources
[0];
469 if (r
->start
>= bounds
->start
&& r
->end
<= bounds
->end
)
475 static void __devinit
pnv_ioda_update_resources(struct pci_bus
*bus
)
477 struct pci_controller
*hose
= pci_bus_to_host(bus
);
478 struct pci_bus
*cbus
;
479 struct pci_dev
*cdev
;
482 /* We used to clear all device enables here. However it looks like
483 * clearing MEM enable causes Obsidian (IPR SCS) to go bonkers,
484 * and shoot fatal errors to the PHB which in turns fences itself
485 * and we can't recover from that ... yet. So for now, let's leave
486 * the enables as-is and hope for the best.
489 /* Check if bus resources fit in our IO or M32 range */
490 for (i
= 0; bus
->self
&& (i
< 2); i
++) {
491 struct resource
*r
= bus
->resource
[i
];
492 if (r
&& !pnv_ioda_resource_fit(hose
, r
))
493 pr_err("%s: Bus %d resource %d disabled, no room\n",
494 pci_name(bus
->self
), bus
->number
, i
);
497 /* Update self if it's not a PHB */
499 pci_setup_bridge(bus
);
501 /* Update child devices */
502 list_for_each_entry(cdev
, &bus
->devices
, bus_list
) {
503 /* Check if resource fits, if not, disabled it */
504 for (i
= 0; i
<= PCI_ROM_RESOURCE
; i
++) {
505 struct resource
*r
= &cdev
->resource
[i
];
506 if (!pnv_ioda_resource_fit(hose
, r
))
507 pr_err("%s: Resource %d disabled, no room\n",
511 /* Assign segments */
512 pnv_ioda_setup_pe_segments(cdev
);
515 for (i
= 0; i
<= PCI_ROM_RESOURCE
; i
++)
516 pci_update_resource(cdev
, i
);
519 /* Update child busses */
520 list_for_each_entry(cbus
, &bus
->children
, node
)
521 pnv_ioda_update_resources(cbus
);
524 static int __devinit
pnv_ioda_alloc_pe(struct pnv_phb
*phb
)
529 pe
= find_next_zero_bit(phb
->ioda
.pe_alloc
,
530 phb
->ioda
.total_pe
, 0);
531 if (pe
>= phb
->ioda
.total_pe
)
532 return IODA_INVALID_PE
;
533 } while(test_and_set_bit(pe
, phb
->ioda
.pe_alloc
));
535 phb
->ioda
.pe_array
[pe
].pe_number
= pe
;
539 static void __devinit
pnv_ioda_free_pe(struct pnv_phb
*phb
, int pe
)
541 WARN_ON(phb
->ioda
.pe_array
[pe
].pdev
);
543 memset(&phb
->ioda
.pe_array
[pe
], 0, sizeof(struct pnv_ioda_pe
));
544 clear_bit(pe
, phb
->ioda
.pe_alloc
);
547 /* Currently those 2 are only used when MSIs are enabled, this will change
548 * but in the meantime, we need to protect them to avoid warnings
550 #ifdef CONFIG_PCI_MSI
551 static struct pnv_ioda_pe
* __devinit
__pnv_ioda_get_one_pe(struct pci_dev
*dev
)
553 struct pci_controller
*hose
= pci_bus_to_host(dev
->bus
);
554 struct pnv_phb
*phb
= hose
->private_data
;
555 struct pci_dn
*pdn
= pnv_ioda_get_pdn(dev
);
559 if (pdn
->pe_number
== IODA_INVALID_PE
)
561 return &phb
->ioda
.pe_array
[pdn
->pe_number
];
564 static struct pnv_ioda_pe
* __devinit
pnv_ioda_get_pe(struct pci_dev
*dev
)
566 struct pnv_ioda_pe
*pe
= __pnv_ioda_get_one_pe(dev
);
568 while (!pe
&& dev
->bus
->self
) {
569 dev
= dev
->bus
->self
;
570 pe
= __pnv_ioda_get_one_pe(dev
);
576 #endif /* CONFIG_PCI_MSI */
578 static int __devinit
pnv_ioda_configure_pe(struct pnv_phb
*phb
,
579 struct pnv_ioda_pe
*pe
)
581 struct pci_dev
*parent
;
582 uint8_t bcomp
, dcomp
, fcomp
;
583 long rc
, rid_end
, rid
;
585 /* Bus validation ? */
589 dcomp
= OPAL_IGNORE_RID_DEVICE_NUMBER
;
590 fcomp
= OPAL_IGNORE_RID_FUNCTION_NUMBER
;
591 parent
= pe
->pbus
->self
;
592 count
= pe
->pbus
->busn_res
.end
- pe
->pbus
->busn_res
.start
+ 1;
594 case 1: bcomp
= OpalPciBusAll
; break;
595 case 2: bcomp
= OpalPciBus7Bits
; break;
596 case 4: bcomp
= OpalPciBus6Bits
; break;
597 case 8: bcomp
= OpalPciBus5Bits
; break;
598 case 16: bcomp
= OpalPciBus4Bits
; break;
599 case 32: bcomp
= OpalPciBus3Bits
; break;
601 pr_err("%s: Number of subordinate busses %d"
603 pci_name(pe
->pbus
->self
), count
);
604 /* Do an exact match only */
605 bcomp
= OpalPciBusAll
;
607 rid_end
= pe
->rid
+ (count
<< 8);
609 parent
= pe
->pdev
->bus
->self
;
610 bcomp
= OpalPciBusAll
;
611 dcomp
= OPAL_COMPARE_RID_DEVICE_NUMBER
;
612 fcomp
= OPAL_COMPARE_RID_FUNCTION_NUMBER
;
613 rid_end
= pe
->rid
+ 1;
616 /* Associate PE in PELT */
617 rc
= opal_pci_set_pe(phb
->opal_id
, pe
->pe_number
, pe
->rid
,
618 bcomp
, dcomp
, fcomp
, OPAL_MAP_PE
);
620 pe_err(pe
, "OPAL error %ld trying to setup PELT table\n", rc
);
623 opal_pci_eeh_freeze_clear(phb
->opal_id
, pe
->pe_number
,
624 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL
);
626 /* Add to all parents PELT-V */
628 struct pci_dn
*pdn
= pnv_ioda_get_pdn(parent
);
629 if (pdn
&& pdn
->pe_number
!= IODA_INVALID_PE
) {
630 rc
= opal_pci_set_peltv(phb
->opal_id
, pdn
->pe_number
,
631 pe
->pe_number
, OPAL_ADD_PE_TO_DOMAIN
);
632 /* XXX What to do in case of error ? */
634 parent
= parent
->bus
->self
;
636 /* Setup reverse map */
637 for (rid
= pe
->rid
; rid
< rid_end
; rid
++)
638 phb
->ioda
.pe_rmap
[rid
] = pe
->pe_number
;
640 /* Setup one MVTs on IODA1 */
641 if (phb
->type
== PNV_PHB_IODA1
) {
642 pe
->mve_number
= pe
->pe_number
;
643 rc
= opal_pci_set_mve(phb
->opal_id
, pe
->mve_number
,
646 pe_err(pe
, "OPAL error %ld setting up MVE %d\n",
650 rc
= opal_pci_set_mve_enable(phb
->opal_id
,
651 pe
->mve_number
, OPAL_ENABLE_MVE
);
653 pe_err(pe
, "OPAL error %ld enabling MVE %d\n",
658 } else if (phb
->type
== PNV_PHB_IODA2
)
664 static void __devinit
pnv_ioda_link_pe_by_weight(struct pnv_phb
*phb
,
665 struct pnv_ioda_pe
*pe
)
667 struct pnv_ioda_pe
*lpe
;
669 list_for_each_entry(lpe
, &phb
->ioda
.pe_list
, link
) {
670 if (lpe
->dma_weight
< pe
->dma_weight
) {
671 list_add_tail(&pe
->link
, &lpe
->link
);
675 list_add_tail(&pe
->link
, &phb
->ioda
.pe_list
);
678 static unsigned int pnv_ioda_dma_weight(struct pci_dev
*dev
)
680 /* This is quite simplistic. The "base" weight of a device
681 * is 10. 0 means no DMA is to be accounted for it.
684 /* If it's a bridge, no DMA */
685 if (dev
->hdr_type
!= PCI_HEADER_TYPE_NORMAL
)
688 /* Reduce the weight of slow USB controllers */
689 if (dev
->class == PCI_CLASS_SERIAL_USB_UHCI
||
690 dev
->class == PCI_CLASS_SERIAL_USB_OHCI
||
691 dev
->class == PCI_CLASS_SERIAL_USB_EHCI
)
694 /* Increase the weight of RAID (includes Obsidian) */
695 if ((dev
->class >> 8) == PCI_CLASS_STORAGE_RAID
)
702 static struct pnv_ioda_pe
* __devinit
pnv_ioda_setup_dev_PE(struct pci_dev
*dev
)
704 struct pci_controller
*hose
= pci_bus_to_host(dev
->bus
);
705 struct pnv_phb
*phb
= hose
->private_data
;
706 struct pci_dn
*pdn
= pnv_ioda_get_pdn(dev
);
707 struct pnv_ioda_pe
*pe
;
711 pr_err("%s: Device tree node not associated properly\n",
715 if (pdn
->pe_number
!= IODA_INVALID_PE
)
718 /* PE#0 has been pre-set */
719 if (dev
->bus
->number
== 0)
722 pe_num
= pnv_ioda_alloc_pe(phb
);
723 if (pe_num
== IODA_INVALID_PE
) {
724 pr_warning("%s: Not enough PE# available, disabling device\n",
729 /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
730 * pointer in the PE data structure, both should be destroyed at the
731 * same time. However, this needs to be looked at more closely again
732 * once we actually start removing things (Hotplug, SR-IOV, ...)
734 * At some point we want to remove the PDN completely anyways
736 pe
= &phb
->ioda
.pe_array
[pe_num
];
739 pdn
->pe_number
= pe_num
;
744 pe
->rid
= dev
->bus
->number
<< 8 | pdn
->devfn
;
746 pe_info(pe
, "Associated device to PE\n");
748 if (pnv_ioda_configure_pe(phb
, pe
)) {
749 /* XXX What do we do here ? */
751 pnv_ioda_free_pe(phb
, pe_num
);
752 pdn
->pe_number
= IODA_INVALID_PE
;
758 /* Assign a DMA weight to the device */
759 pe
->dma_weight
= pnv_ioda_dma_weight(dev
);
760 if (pe
->dma_weight
!= 0) {
761 phb
->ioda
.dma_weight
+= pe
->dma_weight
;
762 phb
->ioda
.dma_pe_count
++;
766 pnv_ioda_link_pe_by_weight(phb
, pe
);
771 static void pnv_ioda_setup_same_PE(struct pci_bus
*bus
, struct pnv_ioda_pe
*pe
)
775 list_for_each_entry(dev
, &bus
->devices
, bus_list
) {
776 struct pci_dn
*pdn
= pnv_ioda_get_pdn(dev
);
779 pr_warn("%s: No device node associated with device !\n",
785 pdn
->pe_number
= pe
->pe_number
;
786 pe
->dma_weight
+= pnv_ioda_dma_weight(dev
);
787 if (dev
->subordinate
)
788 pnv_ioda_setup_same_PE(dev
->subordinate
, pe
);
792 static void __devinit
pnv_ioda_setup_bus_PE(struct pci_dev
*dev
,
793 struct pnv_ioda_pe
*ppe
)
795 struct pci_controller
*hose
= pci_bus_to_host(dev
->bus
);
796 struct pnv_phb
*phb
= hose
->private_data
;
797 struct pci_bus
*bus
= dev
->subordinate
;
798 struct pnv_ioda_pe
*pe
;
802 pr_warning("%s: Bridge without a subordinate bus !\n",
806 pe_num
= pnv_ioda_alloc_pe(phb
);
807 if (pe_num
== IODA_INVALID_PE
) {
808 pr_warning("%s: Not enough PE# available, disabling bus\n",
813 pe
= &phb
->ioda
.pe_array
[pe_num
];
819 pe
->rid
= bus
->busn_res
.start
<< 8;
822 pe_info(pe
, "Secondary busses %pR associated with PE\n",
825 if (pnv_ioda_configure_pe(phb
, pe
)) {
826 /* XXX What do we do here ? */
828 pnv_ioda_free_pe(phb
, pe_num
);
833 /* Associate it with all child devices */
834 pnv_ioda_setup_same_PE(bus
, pe
);
836 /* Account for one DMA PE if at least one DMA capable device exist
839 if (pe
->dma_weight
!= 0) {
840 phb
->ioda
.dma_weight
+= pe
->dma_weight
;
841 phb
->ioda
.dma_pe_count
++;
845 pnv_ioda_link_pe_by_weight(phb
, pe
);
848 static void __devinit
pnv_ioda_setup_PEs(struct pci_bus
*bus
)
851 struct pnv_ioda_pe
*pe
;
853 list_for_each_entry(dev
, &bus
->devices
, bus_list
) {
854 pe
= pnv_ioda_setup_dev_PE(dev
);
857 /* Leaving the PCIe domain ... single PE# */
858 if (dev
->pcie_type
== PCI_EXP_TYPE_PCI_BRIDGE
)
859 pnv_ioda_setup_bus_PE(dev
, pe
);
860 else if (dev
->subordinate
)
861 pnv_ioda_setup_PEs(dev
->subordinate
);
865 static void __devinit
pnv_pci_ioda_dma_dev_setup(struct pnv_phb
*phb
,
868 /* We delay DMA setup after we have assigned all PE# */
871 static void __devinit
pnv_ioda_setup_bus_dma(struct pnv_ioda_pe
*pe
,
876 list_for_each_entry(dev
, &bus
->devices
, bus_list
) {
877 set_iommu_table_base(&dev
->dev
, &pe
->tce32_table
);
878 if (dev
->subordinate
)
879 pnv_ioda_setup_bus_dma(pe
, dev
->subordinate
);
883 static void __devinit
pnv_pci_ioda_setup_dma_pe(struct pnv_phb
*phb
,
884 struct pnv_ioda_pe
*pe
,
889 struct page
*tce_mem
= NULL
;
890 const __be64
*swinvp
;
891 struct iommu_table
*tbl
;
896 /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
897 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
899 /* XXX FIXME: Handle 64-bit only DMA devices */
900 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
901 /* XXX FIXME: Allocate multi-level tables on PHB3 */
903 /* We shouldn't already have a 32-bit DMA associated */
904 if (WARN_ON(pe
->tce32_seg
>= 0))
907 /* Grab a 32-bit TCE table */
908 pe
->tce32_seg
= base
;
909 pe_info(pe
, " Setting up 32-bit TCE table at %08x..%08x\n",
910 (base
<< 28), ((base
+ segs
) << 28) - 1);
912 /* XXX Currently, we allocate one big contiguous table for the
913 * TCEs. We only really need one chunk per 256M of TCE space
914 * (ie per segment) but that's an optimization for later, it
915 * requires some added smarts with our get/put_tce implementation
917 tce_mem
= alloc_pages_node(phb
->hose
->node
, GFP_KERNEL
,
918 get_order(TCE32_TABLE_SIZE
* segs
));
920 pe_err(pe
, " Failed to allocate a 32-bit TCE memory\n");
923 addr
= page_address(tce_mem
);
924 memset(addr
, 0, TCE32_TABLE_SIZE
* segs
);
927 for (i
= 0; i
< segs
; i
++) {
928 rc
= opal_pci_map_pe_dma_window(phb
->opal_id
,
931 __pa(addr
) + TCE32_TABLE_SIZE
* i
,
932 TCE32_TABLE_SIZE
, 0x1000);
934 pe_err(pe
, " Failed to configure 32-bit TCE table,"
940 /* Setup linux iommu table */
941 tbl
= &pe
->tce32_table
;
942 pnv_pci_setup_iommu_table(tbl
, addr
, TCE32_TABLE_SIZE
* segs
,
945 /* OPAL variant of P7IOC SW invalidated TCEs */
946 swinvp
= of_get_property(phb
->hose
->dn
, "ibm,opal-tce-kill", NULL
);
948 /* We need a couple more fields -- an address and a data
949 * to or. Since the bus is only printed out on table free
950 * errors, and on the first pass the data will be a relative
951 * bus number, print that out instead.
954 tbl
->it_index
= (unsigned long)ioremap(be64_to_cpup(swinvp
), 8);
955 tbl
->it_type
= TCE_PCI_SWINV_CREATE
| TCE_PCI_SWINV_FREE
956 | TCE_PCI_SWINV_PAIR
;
958 iommu_init_table(tbl
, phb
->hose
->node
);
961 set_iommu_table_base(&pe
->pdev
->dev
, tbl
);
963 pnv_ioda_setup_bus_dma(pe
, pe
->pbus
);
967 /* XXX Failure: Try to fallback to 64-bit only ? */
968 if (pe
->tce32_seg
>= 0)
971 __free_pages(tce_mem
, get_order(TCE32_TABLE_SIZE
* segs
));
974 static void __devinit
pnv_ioda_setup_dma(struct pnv_phb
*phb
)
976 struct pci_controller
*hose
= phb
->hose
;
977 unsigned int residual
, remaining
, segs
, tw
, base
;
978 struct pnv_ioda_pe
*pe
;
980 /* If we have more PE# than segments available, hand out one
981 * per PE until we run out and let the rest fail. If not,
982 * then we assign at least one segment per PE, plus more based
983 * on the amount of devices under that PE
985 if (phb
->ioda
.dma_pe_count
> phb
->ioda
.tce32_count
)
988 residual
= phb
->ioda
.tce32_count
-
989 phb
->ioda
.dma_pe_count
;
991 pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
992 hose
->global_number
, phb
->ioda
.tce32_count
);
993 pr_info("PCI: %d PE# for a total weight of %d\n",
994 phb
->ioda
.dma_pe_count
, phb
->ioda
.dma_weight
);
996 /* Walk our PE list and configure their DMA segments, hand them
997 * out one base segment plus any residual segments based on
1000 remaining
= phb
->ioda
.tce32_count
;
1001 tw
= phb
->ioda
.dma_weight
;
1003 list_for_each_entry(pe
, &phb
->ioda
.pe_list
, link
) {
1004 if (!pe
->dma_weight
)
1007 pe_warn(pe
, "No DMA32 resources available\n");
1012 segs
+= ((pe
->dma_weight
* residual
) + (tw
/ 2)) / tw
;
1013 if (segs
> remaining
)
1016 pe_info(pe
, "DMA weight %d, assigned %d DMA32 segments\n",
1017 pe
->dma_weight
, segs
);
1018 pnv_pci_ioda_setup_dma_pe(phb
, pe
, base
, segs
);
1024 #ifdef CONFIG_PCI_MSI
1025 static int pnv_pci_ioda_msi_setup(struct pnv_phb
*phb
, struct pci_dev
*dev
,
1026 unsigned int hwirq
, unsigned int is_64
,
1027 struct msi_msg
*msg
)
1029 struct pnv_ioda_pe
*pe
= pnv_ioda_get_pe(dev
);
1030 unsigned int xive_num
= hwirq
- phb
->msi_base
;
1032 uint32_t addr32
, data
;
1035 /* No PE assigned ? bail out ... no MSI for you ! */
1039 /* Check if we have an MVE */
1040 if (pe
->mve_number
< 0)
1043 /* Assign XIVE to PE */
1044 rc
= opal_pci_set_xive_pe(phb
->opal_id
, pe
->pe_number
, xive_num
);
1046 pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
1047 pci_name(dev
), rc
, xive_num
);
1052 rc
= opal_get_msi_64(phb
->opal_id
, pe
->mve_number
, xive_num
, 1,
1055 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
1059 msg
->address_hi
= addr64
>> 32;
1060 msg
->address_lo
= addr64
& 0xfffffffful
;
1062 rc
= opal_get_msi_32(phb
->opal_id
, pe
->mve_number
, xive_num
, 1,
1065 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
1069 msg
->address_hi
= 0;
1070 msg
->address_lo
= addr32
;
1074 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
1075 " address=%x_%08x data=%x PE# %d\n",
1076 pci_name(dev
), is_64
? "64" : "32", hwirq
, xive_num
,
1077 msg
->address_hi
, msg
->address_lo
, data
, pe
->pe_number
);
1082 static void pnv_pci_init_ioda_msis(struct pnv_phb
*phb
)
1084 unsigned int bmap_size
;
1085 const __be32
*prop
= of_get_property(phb
->hose
->dn
,
1086 "ibm,opal-msi-ranges", NULL
);
1089 prop
= of_get_property(phb
->hose
->dn
, "msi-ranges", NULL
);
1094 phb
->msi_base
= be32_to_cpup(prop
);
1095 phb
->msi_count
= be32_to_cpup(prop
+ 1);
1096 bmap_size
= BITS_TO_LONGS(phb
->msi_count
) * sizeof(unsigned long);
1097 phb
->msi_map
= zalloc_maybe_bootmem(bmap_size
, GFP_KERNEL
);
1098 if (!phb
->msi_map
) {
1099 pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
1100 phb
->hose
->global_number
);
1103 phb
->msi_setup
= pnv_pci_ioda_msi_setup
;
1104 phb
->msi32_support
= 1;
1105 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
1106 phb
->msi_count
, phb
->msi_base
);
1109 static void pnv_pci_init_ioda_msis(struct pnv_phb
*phb
) { }
1110 #endif /* CONFIG_PCI_MSI */
1112 /* This is the starting point of our IODA specific resource
1113 * allocation process
1115 static void __devinit
pnv_pci_ioda_fixup_phb(struct pci_controller
*hose
)
1117 resource_size_t size
, align
;
1118 struct pci_bus
*child
;
1120 /* Associate PEs per functions */
1121 pnv_ioda_setup_PEs(hose
->bus
);
1123 /* Calculate all resources */
1124 pnv_ioda_calc_bus(hose
->bus
, IORESOURCE_IO
, &size
, &align
);
1125 pnv_ioda_calc_bus(hose
->bus
, IORESOURCE_MEM
, &size
, &align
);
1127 /* Apply then to HW */
1128 pnv_ioda_update_resources(hose
->bus
);
1131 pnv_ioda_setup_dma(hose
->private_data
);
1133 /* Configure PCI Express settings */
1134 list_for_each_entry(child
, &hose
->bus
->children
, node
) {
1135 struct pci_dev
*self
= child
->self
;
1138 pcie_bus_configure_settings(child
, self
->pcie_mpss
);
1142 /* Prevent enabling devices for which we couldn't properly
1145 static int __devinit
pnv_pci_enable_device_hook(struct pci_dev
*dev
)
1147 struct pci_dn
*pdn
= pnv_ioda_get_pdn(dev
);
1149 if (!pdn
|| pdn
->pe_number
== IODA_INVALID_PE
)
1154 static u32
pnv_ioda_bdfn_to_pe(struct pnv_phb
*phb
, struct pci_bus
*bus
,
1157 return phb
->ioda
.pe_rmap
[(bus
->number
<< 8) | devfn
];
1160 void __init
pnv_pci_init_ioda1_phb(struct device_node
*np
)
1162 struct pci_controller
*hose
;
1163 static int primary
= 1;
1164 struct pnv_phb
*phb
;
1165 unsigned long size
, m32map_off
, iomap_off
, pemap_off
;
1171 pr_info(" Initializing IODA OPAL PHB %s\n", np
->full_name
);
1173 prop64
= of_get_property(np
, "ibm,opal-phbid", NULL
);
1175 pr_err(" Missing \"ibm,opal-phbid\" property !\n");
1178 phb_id
= be64_to_cpup(prop64
);
1179 pr_debug(" PHB-ID : 0x%016llx\n", phb_id
);
1181 phb
= alloc_bootmem(sizeof(struct pnv_phb
));
1183 memset(phb
, 0, sizeof(struct pnv_phb
));
1184 phb
->hose
= hose
= pcibios_alloc_controller(np
);
1186 if (!phb
|| !phb
->hose
) {
1187 pr_err("PCI: Failed to allocate PCI controller for %s\n",
1192 spin_lock_init(&phb
->lock
);
1193 /* XXX Use device-tree */
1194 hose
->first_busno
= 0;
1195 hose
->last_busno
= 0xff;
1196 hose
->private_data
= phb
;
1197 phb
->opal_id
= phb_id
;
1198 phb
->type
= PNV_PHB_IODA1
;
1200 /* Detect specific models for error handling */
1201 if (of_device_is_compatible(np
, "ibm,p7ioc-pciex"))
1202 phb
->model
= PNV_PHB_MODEL_P7IOC
;
1204 phb
->model
= PNV_PHB_MODEL_UNKNOWN
;
1206 /* We parse "ranges" now since we need to deduce the register base
1209 pci_process_bridge_OF_ranges(phb
->hose
, np
, primary
);
1212 /* Magic formula from Milton */
1213 phb
->regs
= of_iomap(np
, 0);
1214 if (phb
->regs
== NULL
)
1215 pr_err(" Failed to map registers !\n");
1218 /* XXX This is hack-a-thon. This needs to be changed so that:
1219 * - we obtain stuff like PE# etc... from device-tree
1220 * - we properly re-allocate M32 ourselves
1221 * (the OFW one isn't very good)
1224 /* Initialize more IODA stuff */
1225 phb
->ioda
.total_pe
= 128;
1227 phb
->ioda
.m32_size
= resource_size(&hose
->mem_resources
[0]);
1228 /* OFW Has already off top 64k of M32 space (MSI space) */
1229 phb
->ioda
.m32_size
+= 0x10000;
1231 phb
->ioda
.m32_segsize
= phb
->ioda
.m32_size
/ phb
->ioda
.total_pe
;
1232 phb
->ioda
.m32_pci_base
= hose
->mem_resources
[0].start
-
1233 hose
->pci_mem_offset
;
1234 phb
->ioda
.io_size
= hose
->pci_io_size
;
1235 phb
->ioda
.io_segsize
= phb
->ioda
.io_size
/ phb
->ioda
.total_pe
;
1236 phb
->ioda
.io_pci_base
= 0; /* XXX calculate this ? */
1238 /* Allocate aux data & arrays */
1239 size
= _ALIGN_UP(phb
->ioda
.total_pe
/ 8, sizeof(unsigned long));
1241 size
+= phb
->ioda
.total_pe
;
1243 size
+= phb
->ioda
.total_pe
;
1245 size
+= phb
->ioda
.total_pe
* sizeof(struct pnv_ioda_pe
);
1246 aux
= alloc_bootmem(size
);
1247 memset(aux
, 0, size
);
1248 phb
->ioda
.pe_alloc
= aux
;
1249 phb
->ioda
.m32_segmap
= aux
+ m32map_off
;
1250 phb
->ioda
.io_segmap
= aux
+ iomap_off
;
1251 phb
->ioda
.pe_array
= aux
+ pemap_off
;
1252 set_bit(0, phb
->ioda
.pe_alloc
);
1254 INIT_LIST_HEAD(&phb
->ioda
.pe_list
);
1256 /* Calculate how many 32-bit TCE segments we have */
1257 phb
->ioda
.tce32_count
= phb
->ioda
.m32_pci_base
>> 28;
1259 /* Clear unusable m64 */
1260 hose
->mem_resources
[1].flags
= 0;
1261 hose
->mem_resources
[1].start
= 0;
1262 hose
->mem_resources
[1].end
= 0;
1263 hose
->mem_resources
[2].flags
= 0;
1264 hose
->mem_resources
[2].start
= 0;
1265 hose
->mem_resources
[2].end
= 0;
1268 rc
= opal_pci_set_phb_mem_window(opal
->phb_id
,
1271 starting_real_address
,
1272 starting_pci_address
,
1276 pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
1278 phb
->ioda
.m32_size
, phb
->ioda
.m32_segsize
,
1279 phb
->ioda
.io_size
, phb
->ioda
.io_segsize
);
1282 pr_devel(" BUID = 0x%016llx\n", in_be64(phb
->regs
+ 0x100));
1283 pr_devel(" PHB2_CR = 0x%016llx\n", in_be64(phb
->regs
+ 0x160));
1284 pr_devel(" IO_BAR = 0x%016llx\n", in_be64(phb
->regs
+ 0x170));
1285 pr_devel(" IO_BAMR = 0x%016llx\n", in_be64(phb
->regs
+ 0x178));
1286 pr_devel(" IO_SAR = 0x%016llx\n", in_be64(phb
->regs
+ 0x180));
1287 pr_devel(" M32_BAR = 0x%016llx\n", in_be64(phb
->regs
+ 0x190));
1288 pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb
->regs
+ 0x198));
1289 pr_devel(" M32_SAR = 0x%016llx\n", in_be64(phb
->regs
+ 0x1a0));
1291 phb
->hose
->ops
= &pnv_pci_ops
;
1293 /* Setup RID -> PE mapping function */
1294 phb
->bdfn_to_pe
= pnv_ioda_bdfn_to_pe
;
1297 phb
->dma_dev_setup
= pnv_pci_ioda_dma_dev_setup
;
1299 /* Setup MSI support */
1300 pnv_pci_init_ioda_msis(phb
);
1302 /* We set both PCI_PROBE_ONLY and PCI_REASSIGN_ALL_RSRC. This is an
1303 * odd combination which essentially means that we skip all resource
1304 * fixups and assignments in the generic code, and do it all
1307 ppc_md
.pcibios_fixup_phb
= pnv_pci_ioda_fixup_phb
;
1308 ppc_md
.pcibios_enable_device_hook
= pnv_pci_enable_device_hook
;
1309 pci_add_flags(PCI_PROBE_ONLY
| PCI_REASSIGN_ALL_RSRC
);
1311 /* Reset IODA tables to a clean state */
1312 rc
= opal_pci_reset(phb_id
, OPAL_PCI_IODA_TABLE_RESET
, OPAL_ASSERT_RESET
);
1314 pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc
);
1315 opal_pci_set_pe(phb_id
, 0, 0, 7, 1, 1 , OPAL_MAP_PE
);
1318 void __init
pnv_pci_init_ioda_hub(struct device_node
*np
)
1320 struct device_node
*phbn
;
1324 pr_info("Probing IODA IO-Hub %s\n", np
->full_name
);
1326 prop64
= of_get_property(np
, "ibm,opal-hubid", NULL
);
1328 pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1331 hub_id
= be64_to_cpup(prop64
);
1332 pr_devel(" HUB-ID : 0x%016llx\n", hub_id
);
1334 /* Count child PHBs */
1335 for_each_child_of_node(np
, phbn
) {
1336 /* Look for IODA1 PHBs */
1337 if (of_device_is_compatible(phbn
, "ibm,ioda-phb"))
1338 pnv_pci_init_ioda1_phb(phbn
);