1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
7 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
8 * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
10 * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
13 #include <linux/init.h>
14 #include <linux/types.h>
15 #include <linux/slab.h>
17 #include <linux/memblock.h>
18 #include <linux/spinlock.h>
19 #include <linux/string.h>
20 #include <linux/pci.h>
21 #include <linux/dma-mapping.h>
22 #include <linux/crash_dump.h>
23 #include <linux/memory.h>
25 #include <linux/iommu.h>
26 #include <linux/rculist.h>
30 #include <asm/iommu.h>
31 #include <asm/pci-bridge.h>
32 #include <asm/machdep.h>
33 #include <asm/firmware.h>
35 #include <asm/ppc-pci.h>
37 #include <asm/mmzone.h>
38 #include <asm/plpar_wrappers.h>
43 DDW_QUERY_PE_DMA_WIN
= 0,
44 DDW_CREATE_PE_DMA_WIN
= 1,
45 DDW_REMOVE_PE_DMA_WIN
= 2,
52 DDW_EXT_RESET_DMA_WIN
= 1,
53 DDW_EXT_QUERY_OUT_SIZE
= 2
56 static struct iommu_table_group
*iommu_pseries_alloc_group(int node
)
58 struct iommu_table_group
*table_group
;
59 struct iommu_table
*tbl
;
61 table_group
= kzalloc_node(sizeof(struct iommu_table_group
), GFP_KERNEL
,
66 tbl
= kzalloc_node(sizeof(struct iommu_table
), GFP_KERNEL
, node
);
70 INIT_LIST_HEAD_RCU(&tbl
->it_group_list
);
71 kref_init(&tbl
->it_kref
);
73 table_group
->tables
[0] = tbl
;
82 static void iommu_pseries_free_group(struct iommu_table_group
*table_group
,
83 const char *node_name
)
85 struct iommu_table
*tbl
;
90 tbl
= table_group
->tables
[0];
91 #ifdef CONFIG_IOMMU_API
92 if (table_group
->group
) {
93 iommu_group_put(table_group
->group
);
94 BUG_ON(table_group
->group
);
97 iommu_tce_table_put(tbl
);
102 static int tce_build_pSeries(struct iommu_table
*tbl
, long index
,
103 long npages
, unsigned long uaddr
,
104 enum dma_data_direction direction
,
111 proto_tce
= TCE_PCI_READ
; // Read allowed
113 if (direction
!= DMA_TO_DEVICE
)
114 proto_tce
|= TCE_PCI_WRITE
;
116 tcep
= ((__be64
*)tbl
->it_base
) + index
;
119 /* can't move this out since we might cross MEMBLOCK boundary */
120 rpn
= __pa(uaddr
) >> TCE_SHIFT
;
121 *tcep
= cpu_to_be64(proto_tce
| (rpn
& TCE_RPN_MASK
) << TCE_RPN_SHIFT
);
123 uaddr
+= TCE_PAGE_SIZE
;
130 static void tce_free_pSeries(struct iommu_table
*tbl
, long index
, long npages
)
134 tcep
= ((__be64
*)tbl
->it_base
) + index
;
140 static unsigned long tce_get_pseries(struct iommu_table
*tbl
, long index
)
144 tcep
= ((__be64
*)tbl
->it_base
) + index
;
146 return be64_to_cpu(*tcep
);
149 static void tce_free_pSeriesLP(unsigned long liobn
, long, long);
150 static void tce_freemulti_pSeriesLP(struct iommu_table
*, long, long);
152 static int tce_build_pSeriesLP(unsigned long liobn
, long tcenum
, long tceshift
,
153 long npages
, unsigned long uaddr
,
154 enum dma_data_direction direction
,
161 long tcenum_start
= tcenum
, npages_start
= npages
;
163 rpn
= __pa(uaddr
) >> tceshift
;
164 proto_tce
= TCE_PCI_READ
;
165 if (direction
!= DMA_TO_DEVICE
)
166 proto_tce
|= TCE_PCI_WRITE
;
169 tce
= proto_tce
| (rpn
& TCE_RPN_MASK
) << tceshift
;
170 rc
= plpar_tce_put((u64
)liobn
, (u64
)tcenum
<< tceshift
, tce
);
172 if (unlikely(rc
== H_NOT_ENOUGH_RESOURCES
)) {
174 tce_free_pSeriesLP(liobn
, tcenum_start
,
175 (npages_start
- (npages
+ 1)));
179 if (rc
&& printk_ratelimit()) {
180 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc
);
181 printk("\tindex = 0x%llx\n", (u64
)liobn
);
182 printk("\ttcenum = 0x%llx\n", (u64
)tcenum
);
183 printk("\ttce val = 0x%llx\n", tce
);
193 static DEFINE_PER_CPU(__be64
*, tce_page
);
195 static int tce_buildmulti_pSeriesLP(struct iommu_table
*tbl
, long tcenum
,
196 long npages
, unsigned long uaddr
,
197 enum dma_data_direction direction
,
205 long tcenum_start
= tcenum
, npages_start
= npages
;
209 if ((npages
== 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND
)) {
210 return tce_build_pSeriesLP(tbl
->it_index
, tcenum
,
211 tbl
->it_page_shift
, npages
, uaddr
,
215 local_irq_save(flags
); /* to protect tcep and the page behind it */
217 tcep
= __this_cpu_read(tce_page
);
219 /* This is safe to do since interrupts are off when we're called
220 * from iommu_alloc{,_sg}()
223 tcep
= (__be64
*)__get_free_page(GFP_ATOMIC
);
224 /* If allocation fails, fall back to the loop implementation */
226 local_irq_restore(flags
);
227 return tce_build_pSeriesLP(tbl
->it_index
, tcenum
,
229 npages
, uaddr
, direction
, attrs
);
231 __this_cpu_write(tce_page
, tcep
);
234 rpn
= __pa(uaddr
) >> TCE_SHIFT
;
235 proto_tce
= TCE_PCI_READ
;
236 if (direction
!= DMA_TO_DEVICE
)
237 proto_tce
|= TCE_PCI_WRITE
;
239 /* We can map max one pageful of TCEs at a time */
242 * Set up the page with TCE data, looping through and setting
245 limit
= min_t(long, npages
, 4096/TCE_ENTRY_SIZE
);
247 for (l
= 0; l
< limit
; l
++) {
248 tcep
[l
] = cpu_to_be64(proto_tce
| (rpn
& TCE_RPN_MASK
) << TCE_RPN_SHIFT
);
252 rc
= plpar_tce_put_indirect((u64
)tbl
->it_index
,
259 } while (npages
> 0 && !rc
);
261 local_irq_restore(flags
);
263 if (unlikely(rc
== H_NOT_ENOUGH_RESOURCES
)) {
265 tce_freemulti_pSeriesLP(tbl
, tcenum_start
,
266 (npages_start
- (npages
+ limit
)));
270 if (rc
&& printk_ratelimit()) {
271 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc
);
272 printk("\tindex = 0x%llx\n", (u64
)tbl
->it_index
);
273 printk("\tnpages = 0x%llx\n", (u64
)npages
);
274 printk("\ttce[0] val = 0x%llx\n", tcep
[0]);
280 static void tce_free_pSeriesLP(unsigned long liobn
, long tcenum
, long npages
)
285 rc
= plpar_tce_put((u64
)liobn
, (u64
)tcenum
<< 12, 0);
287 if (rc
&& printk_ratelimit()) {
288 printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc
);
289 printk("\tindex = 0x%llx\n", (u64
)liobn
);
290 printk("\ttcenum = 0x%llx\n", (u64
)tcenum
);
299 static void tce_freemulti_pSeriesLP(struct iommu_table
*tbl
, long tcenum
, long npages
)
303 if (!firmware_has_feature(FW_FEATURE_STUFF_TCE
))
304 return tce_free_pSeriesLP(tbl
->it_index
, tcenum
, npages
);
306 rc
= plpar_tce_stuff((u64
)tbl
->it_index
, (u64
)tcenum
<< 12, 0, npages
);
308 if (rc
&& printk_ratelimit()) {
309 printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
310 printk("\trc = %lld\n", rc
);
311 printk("\tindex = 0x%llx\n", (u64
)tbl
->it_index
);
312 printk("\tnpages = 0x%llx\n", (u64
)npages
);
317 static unsigned long tce_get_pSeriesLP(struct iommu_table
*tbl
, long tcenum
)
320 unsigned long tce_ret
;
322 rc
= plpar_tce_get((u64
)tbl
->it_index
, (u64
)tcenum
<< 12, &tce_ret
);
324 if (rc
&& printk_ratelimit()) {
325 printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc
);
326 printk("\tindex = 0x%llx\n", (u64
)tbl
->it_index
);
327 printk("\ttcenum = 0x%llx\n", (u64
)tcenum
);
334 /* this is compatible with cells for the device tree property */
335 struct dynamic_dma_window_prop
{
336 __be32 liobn
; /* tce table number */
337 __be64 dma_base
; /* address hi,lo */
338 __be32 tce_shift
; /* ilog2(tce_page_size) */
339 __be32 window_shift
; /* ilog2(tce_window_size) */
342 struct direct_window
{
343 struct device_node
*device
;
344 const struct dynamic_dma_window_prop
*prop
;
345 struct list_head list
;
348 /* Dynamic DMA Window support */
349 struct ddw_query_response
{
350 u32 windows_available
;
351 u64 largest_available_block
;
353 u32 migration_capable
;
356 struct ddw_create_response
{
362 static LIST_HEAD(direct_window_list
);
363 /* prevents races between memory on/offline and window creation */
364 static DEFINE_SPINLOCK(direct_window_list_lock
);
365 /* protects initializing window twice for same device */
366 static DEFINE_MUTEX(direct_window_init_mutex
);
367 #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
369 static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn
,
370 unsigned long num_pfn
, const void *arg
)
372 const struct dynamic_dma_window_prop
*maprange
= arg
;
374 u64 tce_size
, num_tce
, dma_offset
, next
;
378 tce_shift
= be32_to_cpu(maprange
->tce_shift
);
379 tce_size
= 1ULL << tce_shift
;
380 next
= start_pfn
<< PAGE_SHIFT
;
381 num_tce
= num_pfn
<< PAGE_SHIFT
;
383 /* round back to the beginning of the tce page size */
384 num_tce
+= next
& (tce_size
- 1);
385 next
&= ~(tce_size
- 1);
387 /* covert to number of tces */
388 num_tce
|= tce_size
- 1;
389 num_tce
>>= tce_shift
;
393 * Set up the page with TCE data, looping through and setting
396 limit
= min_t(long, num_tce
, 512);
397 dma_offset
= next
+ be64_to_cpu(maprange
->dma_base
);
399 rc
= plpar_tce_stuff((u64
)be32_to_cpu(maprange
->liobn
),
402 next
+= limit
* tce_size
;
404 } while (num_tce
> 0 && !rc
);
409 static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn
,
410 unsigned long num_pfn
, const void *arg
)
412 const struct dynamic_dma_window_prop
*maprange
= arg
;
413 u64 tce_size
, num_tce
, dma_offset
, next
, proto_tce
, liobn
;
419 if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND
)) {
420 unsigned long tceshift
= be32_to_cpu(maprange
->tce_shift
);
421 unsigned long dmastart
= (start_pfn
<< PAGE_SHIFT
) +
422 be64_to_cpu(maprange
->dma_base
);
423 unsigned long tcenum
= dmastart
>> tceshift
;
424 unsigned long npages
= num_pfn
<< PAGE_SHIFT
>> tceshift
;
425 void *uaddr
= __va(start_pfn
<< PAGE_SHIFT
);
427 return tce_build_pSeriesLP(be32_to_cpu(maprange
->liobn
),
428 tcenum
, tceshift
, npages
, (unsigned long) uaddr
,
429 DMA_BIDIRECTIONAL
, 0);
432 local_irq_disable(); /* to protect tcep and the page behind it */
433 tcep
= __this_cpu_read(tce_page
);
436 tcep
= (__be64
*)__get_free_page(GFP_ATOMIC
);
441 __this_cpu_write(tce_page
, tcep
);
444 proto_tce
= TCE_PCI_READ
| TCE_PCI_WRITE
;
446 liobn
= (u64
)be32_to_cpu(maprange
->liobn
);
447 tce_shift
= be32_to_cpu(maprange
->tce_shift
);
448 tce_size
= 1ULL << tce_shift
;
449 next
= start_pfn
<< PAGE_SHIFT
;
450 num_tce
= num_pfn
<< PAGE_SHIFT
;
452 /* round back to the beginning of the tce page size */
453 num_tce
+= next
& (tce_size
- 1);
454 next
&= ~(tce_size
- 1);
456 /* covert to number of tces */
457 num_tce
|= tce_size
- 1;
458 num_tce
>>= tce_shift
;
460 /* We can map max one pageful of TCEs at a time */
463 * Set up the page with TCE data, looping through and setting
466 limit
= min_t(long, num_tce
, 4096/TCE_ENTRY_SIZE
);
467 dma_offset
= next
+ be64_to_cpu(maprange
->dma_base
);
469 for (l
= 0; l
< limit
; l
++) {
470 tcep
[l
] = cpu_to_be64(proto_tce
| next
);
474 rc
= plpar_tce_put_indirect(liobn
,
480 } while (num_tce
> 0 && !rc
);
482 /* error cleanup: caller will clear whole range */
488 static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn
,
489 unsigned long num_pfn
, void *arg
)
491 return tce_setrange_multi_pSeriesLP(start_pfn
, num_pfn
, arg
);
494 static void iommu_table_setparms(struct pci_controller
*phb
,
495 struct device_node
*dn
,
496 struct iommu_table
*tbl
)
498 struct device_node
*node
;
499 const unsigned long *basep
;
504 basep
= of_get_property(node
, "linux,tce-base", NULL
);
505 sizep
= of_get_property(node
, "linux,tce-size", NULL
);
506 if (basep
== NULL
|| sizep
== NULL
) {
507 printk(KERN_ERR
"PCI_DMA: iommu_table_setparms: %pOF has "
508 "missing tce entries !\n", dn
);
512 tbl
->it_base
= (unsigned long)__va(*basep
);
514 if (!is_kdump_kernel())
515 memset((void *)tbl
->it_base
, 0, *sizep
);
517 tbl
->it_busno
= phb
->bus
->number
;
518 tbl
->it_page_shift
= IOMMU_PAGE_SHIFT_4K
;
520 /* Units of tce entries */
521 tbl
->it_offset
= phb
->dma_window_base_cur
>> tbl
->it_page_shift
;
523 /* Test if we are going over 2GB of DMA space */
524 if (phb
->dma_window_base_cur
+ phb
->dma_window_size
> 0x80000000ul
) {
525 udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
526 panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
529 phb
->dma_window_base_cur
+= phb
->dma_window_size
;
531 /* Set the tce table size - measured in entries */
532 tbl
->it_size
= phb
->dma_window_size
>> tbl
->it_page_shift
;
535 tbl
->it_blocksize
= 16;
536 tbl
->it_type
= TCE_PCI
;
540 * iommu_table_setparms_lpar
542 * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
544 static void iommu_table_setparms_lpar(struct pci_controller
*phb
,
545 struct device_node
*dn
,
546 struct iommu_table
*tbl
,
547 struct iommu_table_group
*table_group
,
548 const __be32
*dma_window
)
550 unsigned long offset
, size
;
552 of_parse_dma_window(dn
, dma_window
, &tbl
->it_index
, &offset
, &size
);
554 tbl
->it_busno
= phb
->bus
->number
;
555 tbl
->it_page_shift
= IOMMU_PAGE_SHIFT_4K
;
557 tbl
->it_blocksize
= 16;
558 tbl
->it_type
= TCE_PCI
;
559 tbl
->it_offset
= offset
>> tbl
->it_page_shift
;
560 tbl
->it_size
= size
>> tbl
->it_page_shift
;
562 table_group
->tce32_start
= offset
;
563 table_group
->tce32_size
= size
;
566 struct iommu_table_ops iommu_table_pseries_ops
= {
567 .set
= tce_build_pSeries
,
568 .clear
= tce_free_pSeries
,
569 .get
= tce_get_pseries
572 static void pci_dma_bus_setup_pSeries(struct pci_bus
*bus
)
574 struct device_node
*dn
;
575 struct iommu_table
*tbl
;
576 struct device_node
*isa_dn
, *isa_dn_orig
;
577 struct device_node
*tmp
;
581 dn
= pci_bus_to_OF_node(bus
);
583 pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn
);
586 /* This is not a root bus, any setup will be done for the
587 * device-side of the bridge in iommu_dev_setup_pSeries().
593 /* Check if the ISA bus on the system is under
596 isa_dn
= isa_dn_orig
= of_find_node_by_type(NULL
, "isa");
598 while (isa_dn
&& isa_dn
!= dn
)
599 isa_dn
= isa_dn
->parent
;
601 of_node_put(isa_dn_orig
);
603 /* Count number of direct PCI children of the PHB. */
604 for (children
= 0, tmp
= dn
->child
; tmp
; tmp
= tmp
->sibling
)
607 pr_debug("Children: %d\n", children
);
609 /* Calculate amount of DMA window per slot. Each window must be
610 * a power of two (due to pci_alloc_consistent requirements).
612 * Keep 256MB aside for PHBs with ISA.
616 /* No ISA/IDE - just set window size and return */
617 pci
->phb
->dma_window_size
= 0x80000000ul
; /* To be divided */
619 while (pci
->phb
->dma_window_size
* children
> 0x80000000ul
)
620 pci
->phb
->dma_window_size
>>= 1;
621 pr_debug("No ISA/IDE, window size is 0x%llx\n",
622 pci
->phb
->dma_window_size
);
623 pci
->phb
->dma_window_base_cur
= 0;
628 /* If we have ISA, then we probably have an IDE
629 * controller too. Allocate a 128MB table but
630 * skip the first 128MB to avoid stepping on ISA
633 pci
->phb
->dma_window_size
= 0x8000000ul
;
634 pci
->phb
->dma_window_base_cur
= 0x8000000ul
;
636 pci
->table_group
= iommu_pseries_alloc_group(pci
->phb
->node
);
637 tbl
= pci
->table_group
->tables
[0];
639 iommu_table_setparms(pci
->phb
, dn
, tbl
);
640 tbl
->it_ops
= &iommu_table_pseries_ops
;
641 iommu_init_table(tbl
, pci
->phb
->node
, 0, 0);
643 /* Divide the rest (1.75GB) among the children */
644 pci
->phb
->dma_window_size
= 0x80000000ul
;
645 while (pci
->phb
->dma_window_size
* children
> 0x70000000ul
)
646 pci
->phb
->dma_window_size
>>= 1;
648 pr_debug("ISA/IDE, window size is 0x%llx\n", pci
->phb
->dma_window_size
);
651 #ifdef CONFIG_IOMMU_API
652 static int tce_exchange_pseries(struct iommu_table
*tbl
, long index
, unsigned
653 long *tce
, enum dma_data_direction
*direction
,
657 unsigned long ioba
= (unsigned long) index
<< tbl
->it_page_shift
;
658 unsigned long flags
, oldtce
= 0;
659 u64 proto_tce
= iommu_direction_to_tce_perm(*direction
);
660 unsigned long newtce
= *tce
| proto_tce
;
662 spin_lock_irqsave(&tbl
->large_pool
.lock
, flags
);
664 rc
= plpar_tce_get((u64
)tbl
->it_index
, ioba
, &oldtce
);
666 rc
= plpar_tce_put((u64
)tbl
->it_index
, ioba
, newtce
);
669 *direction
= iommu_tce_direction(oldtce
);
670 *tce
= oldtce
& ~(TCE_PCI_READ
| TCE_PCI_WRITE
);
673 spin_unlock_irqrestore(&tbl
->large_pool
.lock
, flags
);
679 struct iommu_table_ops iommu_table_lpar_multi_ops
= {
680 .set
= tce_buildmulti_pSeriesLP
,
681 #ifdef CONFIG_IOMMU_API
682 .xchg_no_kill
= tce_exchange_pseries
,
684 .clear
= tce_freemulti_pSeriesLP
,
685 .get
= tce_get_pSeriesLP
688 static void pci_dma_bus_setup_pSeriesLP(struct pci_bus
*bus
)
690 struct iommu_table
*tbl
;
691 struct device_node
*dn
, *pdn
;
693 const __be32
*dma_window
= NULL
;
695 dn
= pci_bus_to_OF_node(bus
);
697 pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
700 /* Find nearest ibm,dma-window, walking up the device tree */
701 for (pdn
= dn
; pdn
!= NULL
; pdn
= pdn
->parent
) {
702 dma_window
= of_get_property(pdn
, "ibm,dma-window", NULL
);
703 if (dma_window
!= NULL
)
707 if (dma_window
== NULL
) {
708 pr_debug(" no ibm,dma-window property !\n");
714 pr_debug(" parent is %pOF, iommu_table: 0x%p\n",
715 pdn
, ppci
->table_group
);
717 if (!ppci
->table_group
) {
718 ppci
->table_group
= iommu_pseries_alloc_group(ppci
->phb
->node
);
719 tbl
= ppci
->table_group
->tables
[0];
720 iommu_table_setparms_lpar(ppci
->phb
, pdn
, tbl
,
721 ppci
->table_group
, dma_window
);
722 tbl
->it_ops
= &iommu_table_lpar_multi_ops
;
723 iommu_init_table(tbl
, ppci
->phb
->node
, 0, 0);
724 iommu_register_group(ppci
->table_group
,
725 pci_domain_nr(bus
), 0);
726 pr_debug(" created table: %p\n", ppci
->table_group
);
731 static void pci_dma_dev_setup_pSeries(struct pci_dev
*dev
)
733 struct device_node
*dn
;
734 struct iommu_table
*tbl
;
736 pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev
));
738 dn
= dev
->dev
.of_node
;
740 /* If we're the direct child of a root bus, then we need to allocate
741 * an iommu table ourselves. The bus setup code should have setup
742 * the window sizes already.
744 if (!dev
->bus
->self
) {
745 struct pci_controller
*phb
= PCI_DN(dn
)->phb
;
747 pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
748 PCI_DN(dn
)->table_group
= iommu_pseries_alloc_group(phb
->node
);
749 tbl
= PCI_DN(dn
)->table_group
->tables
[0];
750 iommu_table_setparms(phb
, dn
, tbl
);
751 tbl
->it_ops
= &iommu_table_pseries_ops
;
752 iommu_init_table(tbl
, phb
->node
, 0, 0);
753 set_iommu_table_base(&dev
->dev
, tbl
);
757 /* If this device is further down the bus tree, search upwards until
758 * an already allocated iommu table is found and use that.
761 while (dn
&& PCI_DN(dn
) && PCI_DN(dn
)->table_group
== NULL
)
764 if (dn
&& PCI_DN(dn
))
765 set_iommu_table_base(&dev
->dev
,
766 PCI_DN(dn
)->table_group
->tables
[0]);
768 printk(KERN_WARNING
"iommu: Device %s has no iommu table\n",
772 static int __read_mostly disable_ddw
;
774 static int __init
disable_ddw_setup(char *str
)
777 printk(KERN_INFO
"ppc iommu: disabling ddw.\n");
782 early_param("disable_ddw", disable_ddw_setup
);
784 static void remove_dma_window(struct device_node
*np
, u32
*ddw_avail
,
785 struct property
*win
)
787 struct dynamic_dma_window_prop
*dwp
;
792 liobn
= (u64
)be32_to_cpu(dwp
->liobn
);
794 /* clear the whole window, note the arg is in kernel pages */
795 ret
= tce_clearrange_multi_pSeriesLP(0,
796 1ULL << (be32_to_cpu(dwp
->window_shift
) - PAGE_SHIFT
), dwp
);
798 pr_warn("%pOF failed to clear tces in window.\n",
801 pr_debug("%pOF successfully cleared tces in window.\n",
804 ret
= rtas_call(ddw_avail
[DDW_REMOVE_PE_DMA_WIN
], 1, 1, NULL
, liobn
);
806 pr_warn("%pOF: failed to remove direct window: rtas returned "
807 "%d to ibm,remove-pe-dma-window(%x) %llx\n",
808 np
, ret
, ddw_avail
[DDW_REMOVE_PE_DMA_WIN
], liobn
);
810 pr_debug("%pOF: successfully removed direct window: rtas returned "
811 "%d to ibm,remove-pe-dma-window(%x) %llx\n",
812 np
, ret
, ddw_avail
[DDW_REMOVE_PE_DMA_WIN
], liobn
);
815 static void remove_ddw(struct device_node
*np
, bool remove_prop
)
817 struct property
*win
;
818 u32 ddw_avail
[DDW_APPLICABLE_SIZE
];
821 ret
= of_property_read_u32_array(np
, "ibm,ddw-applicable",
822 &ddw_avail
[0], DDW_APPLICABLE_SIZE
);
826 win
= of_find_property(np
, DIRECT64_PROPNAME
, NULL
);
830 if (win
->length
>= sizeof(struct dynamic_dma_window_prop
))
831 remove_dma_window(np
, ddw_avail
, win
);
836 ret
= of_remove_property(np
, win
);
838 pr_warn("%pOF: failed to remove direct window property: %d\n",
842 static u64
find_existing_ddw(struct device_node
*pdn
, int *window_shift
)
844 struct direct_window
*window
;
845 const struct dynamic_dma_window_prop
*direct64
;
848 spin_lock(&direct_window_list_lock
);
849 /* check if we already created a window and dupe that config if so */
850 list_for_each_entry(window
, &direct_window_list
, list
) {
851 if (window
->device
== pdn
) {
852 direct64
= window
->prop
;
853 dma_addr
= be64_to_cpu(direct64
->dma_base
);
854 *window_shift
= be32_to_cpu(direct64
->window_shift
);
858 spin_unlock(&direct_window_list_lock
);
863 static int find_existing_ddw_windows(void)
866 struct device_node
*pdn
;
867 struct direct_window
*window
;
868 const struct dynamic_dma_window_prop
*direct64
;
870 if (!firmware_has_feature(FW_FEATURE_LPAR
))
873 for_each_node_with_property(pdn
, DIRECT64_PROPNAME
) {
874 direct64
= of_get_property(pdn
, DIRECT64_PROPNAME
, &len
);
878 window
= kzalloc(sizeof(*window
), GFP_KERNEL
);
879 if (!window
|| len
< sizeof(struct dynamic_dma_window_prop
)) {
881 remove_ddw(pdn
, true);
885 window
->device
= pdn
;
886 window
->prop
= direct64
;
887 spin_lock(&direct_window_list_lock
);
888 list_add(&window
->list
, &direct_window_list
);
889 spin_unlock(&direct_window_list_lock
);
894 machine_arch_initcall(pseries
, find_existing_ddw_windows
);
897 * ddw_read_ext - Get the value of an DDW extension
898 * @np: device node from which the extension value is to be read.
899 * @extnum: index number of the extension.
900 * @value: pointer to return value, modified when extension is available.
902 * Checks if "ibm,ddw-extensions" exists for this node, and get the value
904 * It can be used only to check if a property exists, passing value == NULL.
907 * 0 if extension successfully read
908 * -EINVAL if the "ibm,ddw-extensions" does not exist,
909 * -ENODATA if "ibm,ddw-extensions" does not have a value, and
910 * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
912 static inline int ddw_read_ext(const struct device_node
*np
, int extnum
,
915 static const char propname
[] = "ibm,ddw-extensions";
919 ret
= of_property_read_u32_index(np
, propname
, DDW_EXT_SIZE
, &count
);
929 return of_property_read_u32_index(np
, propname
, extnum
, value
);
932 static int query_ddw(struct pci_dev
*dev
, const u32
*ddw_avail
,
933 struct ddw_query_response
*query
,
934 struct device_node
*parent
)
936 struct device_node
*dn
;
938 u32 cfg_addr
, ext_query
, query_out
[5];
943 * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
944 * output parameters ibm,query-pe-dma-windows will have, ranging from
947 ret
= ddw_read_ext(parent
, DDW_EXT_QUERY_OUT_SIZE
, &ext_query
);
948 if (!ret
&& ext_query
== 1)
954 * Get the config address and phb buid of the PE window.
955 * Rely on eeh to retrieve this for us.
956 * Retrieve them from the pci device, not the node with the
957 * dma-window property
959 dn
= pci_device_to_OF_node(dev
);
961 buid
= pdn
->phb
->buid
;
962 cfg_addr
= ((pdn
->busno
<< 16) | (pdn
->devfn
<< 8));
964 ret
= rtas_call(ddw_avail
[DDW_QUERY_PE_DMA_WIN
], 3, out_sz
, query_out
,
965 cfg_addr
, BUID_HI(buid
), BUID_LO(buid
));
966 dev_info(&dev
->dev
, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
967 ddw_avail
[DDW_QUERY_PE_DMA_WIN
], cfg_addr
, BUID_HI(buid
),
972 query
->windows_available
= query_out
[0];
973 query
->largest_available_block
= query_out
[1];
974 query
->page_size
= query_out
[2];
975 query
->migration_capable
= query_out
[3];
978 query
->windows_available
= query_out
[0];
979 query
->largest_available_block
= ((u64
)query_out
[1] << 32) |
981 query
->page_size
= query_out
[3];
982 query
->migration_capable
= query_out
[4];
989 static int create_ddw(struct pci_dev
*dev
, const u32
*ddw_avail
,
990 struct ddw_create_response
*create
, int page_shift
,
993 struct device_node
*dn
;
1000 * Get the config address and phb buid of the PE window.
1001 * Rely on eeh to retrieve this for us.
1002 * Retrieve them from the pci device, not the node with the
1003 * dma-window property
1005 dn
= pci_device_to_OF_node(dev
);
1007 buid
= pdn
->phb
->buid
;
1008 cfg_addr
= ((pdn
->busno
<< 16) | (pdn
->devfn
<< 8));
1011 /* extra outputs are LIOBN and dma-addr (hi, lo) */
1012 ret
= rtas_call(ddw_avail
[DDW_CREATE_PE_DMA_WIN
], 5, 4,
1013 (u32
*)create
, cfg_addr
, BUID_HI(buid
),
1014 BUID_LO(buid
), page_shift
, window_shift
);
1015 } while (rtas_busy_delay(ret
));
1017 "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
1018 "(liobn = 0x%x starting addr = %x %x)\n",
1019 ddw_avail
[DDW_CREATE_PE_DMA_WIN
], cfg_addr
, BUID_HI(buid
),
1020 BUID_LO(buid
), page_shift
, window_shift
, ret
, create
->liobn
,
1021 create
->addr_hi
, create
->addr_lo
);
1026 struct failed_ddw_pdn
{
1027 struct device_node
*pdn
;
1028 struct list_head list
;
1031 static LIST_HEAD(failed_ddw_pdn_list
);
1033 static phys_addr_t
ddw_memory_hotplug_max(void)
1035 phys_addr_t max_addr
= memory_hotplug_max();
1036 struct device_node
*memory
;
1039 * The "ibm,pmemory" can appear anywhere in the address space.
1040 * Assuming it is still backed by page structs, set the upper limit
1041 * for the huge DMA window as MAX_PHYSMEM_BITS.
1043 if (of_find_node_by_type(NULL
, "ibm,pmemory"))
1044 return (sizeof(phys_addr_t
) * 8 <= MAX_PHYSMEM_BITS
) ?
1045 (phys_addr_t
) -1 : (1ULL << MAX_PHYSMEM_BITS
);
1047 for_each_node_by_type(memory
, "memory") {
1048 unsigned long start
, size
;
1049 int n_mem_addr_cells
, n_mem_size_cells
, len
;
1050 const __be32
*memcell_buf
;
1052 memcell_buf
= of_get_property(memory
, "reg", &len
);
1053 if (!memcell_buf
|| len
<= 0)
1056 n_mem_addr_cells
= of_n_addr_cells(memory
);
1057 n_mem_size_cells
= of_n_size_cells(memory
);
1059 start
= of_read_number(memcell_buf
, n_mem_addr_cells
);
1060 memcell_buf
+= n_mem_addr_cells
;
1061 size
= of_read_number(memcell_buf
, n_mem_size_cells
);
1062 memcell_buf
+= n_mem_size_cells
;
1064 max_addr
= max_t(phys_addr_t
, max_addr
, start
+ size
);
1071 * Platforms supporting the DDW option starting with LoPAR level 2.7 implement
1072 * ibm,ddw-extensions, which carries the rtas token for
1073 * ibm,reset-pe-dma-windows.
1074 * That rtas-call can be used to restore the default DMA window for the device.
1076 static void reset_dma_window(struct pci_dev
*dev
, struct device_node
*par_dn
)
1079 u32 cfg_addr
, reset_dma_win
;
1081 struct device_node
*dn
;
1084 ret
= ddw_read_ext(par_dn
, DDW_EXT_RESET_DMA_WIN
, &reset_dma_win
);
1088 dn
= pci_device_to_OF_node(dev
);
1090 buid
= pdn
->phb
->buid
;
1091 cfg_addr
= (pdn
->busno
<< 16) | (pdn
->devfn
<< 8);
1093 ret
= rtas_call(reset_dma_win
, 3, 1, NULL
, cfg_addr
, BUID_HI(buid
),
1097 "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
1098 reset_dma_win
, cfg_addr
, BUID_HI(buid
), BUID_LO(buid
),
1103 * If the PE supports dynamic dma windows, and there is space for a table
1104 * that can map all pages in a linear offset, then setup such a table,
1105 * and record the dma-offset in the struct device.
1107 * dev: the pci device we are checking
1108 * pdn: the parent pe node with the ibm,dma_window property
1109 * Future: also check if we can remap the base window for our base page size
1111 * returns the dma offset for use by the direct mapped DMA code.
1113 static u64
enable_ddw(struct pci_dev
*dev
, struct device_node
*pdn
)
1116 int max_ram_len
= order_base_2(ddw_memory_hotplug_max());
1117 struct ddw_query_response query
;
1118 struct ddw_create_response create
;
1121 struct device_node
*dn
;
1122 u32 ddw_avail
[DDW_APPLICABLE_SIZE
];
1123 struct direct_window
*window
;
1124 struct property
*win64
;
1125 struct dynamic_dma_window_prop
*ddwprop
;
1126 struct failed_ddw_pdn
*fpdn
;
1127 bool default_win_removed
= false;
1130 dn
= of_find_node_by_type(NULL
, "ibm,pmemory");
1131 pmem_present
= dn
!= NULL
;
1134 mutex_lock(&direct_window_init_mutex
);
1136 dma_addr
= find_existing_ddw(pdn
, &len
);
1141 * If we already went through this for a previous function of
1142 * the same device and failed, we don't want to muck with the
1143 * DMA window again, as it will race with in-flight operations
1144 * and can lead to EEHs. The above mutex protects access to the
1147 list_for_each_entry(fpdn
, &failed_ddw_pdn_list
, list
) {
1148 if (fpdn
->pdn
== pdn
)
1153 * the ibm,ddw-applicable property holds the tokens for:
1154 * ibm,query-pe-dma-window
1155 * ibm,create-pe-dma-window
1156 * ibm,remove-pe-dma-window
1157 * for the given node in that order.
1158 * the property is actually in the parent, not the PE
1160 ret
= of_property_read_u32_array(pdn
, "ibm,ddw-applicable",
1161 &ddw_avail
[0], DDW_APPLICABLE_SIZE
);
1166 * Query if there is a second window of size to map the
1167 * whole partition. Query returns number of windows, largest
1168 * block assigned to PE (partition endpoint), and two bitmasks
1169 * of page sizes: supported and supported for migrate-dma.
1171 dn
= pci_device_to_OF_node(dev
);
1172 ret
= query_ddw(dev
, ddw_avail
, &query
, pdn
);
1177 * If there is no window available, remove the default DMA window,
1178 * if it's present. This will make all the resources available to the
1180 * If anything fails after this, we need to restore it, so also check
1181 * for extensions presence.
1183 if (query
.windows_available
== 0) {
1184 struct property
*default_win
;
1187 default_win
= of_find_property(pdn
, "ibm,dma-window", NULL
);
1191 reset_win_ext
= ddw_read_ext(pdn
, DDW_EXT_RESET_DMA_WIN
, NULL
);
1195 remove_dma_window(pdn
, ddw_avail
, default_win
);
1196 default_win_removed
= true;
1198 /* Query again, to check if the window is available */
1199 ret
= query_ddw(dev
, ddw_avail
, &query
, pdn
);
1203 if (query
.windows_available
== 0) {
1204 /* no windows are available for this device. */
1205 dev_dbg(&dev
->dev
, "no free dynamic windows");
1209 if (query
.page_size
& 4) {
1210 page_shift
= 24; /* 16MB */
1211 } else if (query
.page_size
& 2) {
1212 page_shift
= 16; /* 64kB */
1213 } else if (query
.page_size
& 1) {
1214 page_shift
= 12; /* 4kB */
1216 dev_dbg(&dev
->dev
, "no supported direct page size in mask %x",
1220 /* verify the window * number of ptes will map the partition */
1221 /* check largest block * page size > max memory hotplug addr */
1223 * The "ibm,pmemory" can appear anywhere in the address space.
1224 * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
1225 * for the upper limit and fallback to max RAM otherwise but this
1226 * disables device::dma_ops_bypass.
1230 if (query
.largest_available_block
>=
1231 (1ULL << (MAX_PHYSMEM_BITS
- page_shift
)))
1232 len
= MAX_PHYSMEM_BITS
- page_shift
;
1234 dev_info(&dev
->dev
, "Skipping ibm,pmemory");
1237 if (query
.largest_available_block
< (1ULL << (len
- page_shift
))) {
1239 "can't map partition max 0x%llx with %llu %llu-sized pages\n",
1241 query
.largest_available_block
,
1242 1ULL << page_shift
);
1245 win64
= kzalloc(sizeof(struct property
), GFP_KERNEL
);
1248 "couldn't allocate property for 64bit dma window\n");
1251 win64
->name
= kstrdup(DIRECT64_PROPNAME
, GFP_KERNEL
);
1252 win64
->value
= ddwprop
= kmalloc(sizeof(*ddwprop
), GFP_KERNEL
);
1253 win64
->length
= sizeof(*ddwprop
);
1254 if (!win64
->name
|| !win64
->value
) {
1256 "couldn't allocate property name and value\n");
1260 ret
= create_ddw(dev
, ddw_avail
, &create
, page_shift
, len
);
1264 ddwprop
->liobn
= cpu_to_be32(create
.liobn
);
1265 ddwprop
->dma_base
= cpu_to_be64(((u64
)create
.addr_hi
<< 32) |
1267 ddwprop
->tce_shift
= cpu_to_be32(page_shift
);
1268 ddwprop
->window_shift
= cpu_to_be32(len
);
1270 dev_dbg(&dev
->dev
, "created tce table LIOBN 0x%x for %pOF\n",
1273 window
= kzalloc(sizeof(*window
), GFP_KERNEL
);
1275 goto out_clear_window
;
1277 ret
= walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT
,
1278 win64
->value
, tce_setrange_multi_pSeriesLP_walk
);
1280 dev_info(&dev
->dev
, "failed to map direct window for %pOF: %d\n",
1282 goto out_free_window
;
1285 ret
= of_add_property(pdn
, win64
);
1287 dev_err(&dev
->dev
, "unable to add dma window property for %pOF: %d",
1289 goto out_free_window
;
1292 window
->device
= pdn
;
1293 window
->prop
= ddwprop
;
1294 spin_lock(&direct_window_list_lock
);
1295 list_add(&window
->list
, &direct_window_list
);
1296 spin_unlock(&direct_window_list_lock
);
1298 dma_addr
= be64_to_cpu(ddwprop
->dma_base
);
1305 remove_ddw(pdn
, true);
1309 kfree(win64
->value
);
1313 if (default_win_removed
)
1314 reset_dma_window(dev
, pdn
);
1316 fpdn
= kzalloc(sizeof(*fpdn
), GFP_KERNEL
);
1320 list_add(&fpdn
->list
, &failed_ddw_pdn_list
);
1323 mutex_unlock(&direct_window_init_mutex
);
1326 * If we have persistent memory and the window size is only as big
1327 * as RAM, then we failed to create a window to cover persistent
1328 * memory and need to set the DMA limit.
1330 if (pmem_present
&& dma_addr
&& (len
== max_ram_len
))
1331 dev
->dev
.bus_dma_limit
= dma_addr
+ (1ULL << len
);
1336 static void pci_dma_dev_setup_pSeriesLP(struct pci_dev
*dev
)
1338 struct device_node
*pdn
, *dn
;
1339 struct iommu_table
*tbl
;
1340 const __be32
*dma_window
= NULL
;
1343 pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev
));
1345 /* dev setup for LPAR is a little tricky, since the device tree might
1346 * contain the dma-window properties per-device and not necessarily
1347 * for the bus. So we need to search upwards in the tree until we
1348 * either hit a dma-window property, OR find a parent with a table
1349 * already allocated.
1351 dn
= pci_device_to_OF_node(dev
);
1352 pr_debug(" node is %pOF\n", dn
);
1354 for (pdn
= dn
; pdn
&& PCI_DN(pdn
) && !PCI_DN(pdn
)->table_group
;
1355 pdn
= pdn
->parent
) {
1356 dma_window
= of_get_property(pdn
, "ibm,dma-window", NULL
);
1361 if (!pdn
|| !PCI_DN(pdn
)) {
1362 printk(KERN_WARNING
"pci_dma_dev_setup_pSeriesLP: "
1363 "no DMA window found for pci dev=%s dn=%pOF\n",
1367 pr_debug(" parent is %pOF\n", pdn
);
1370 if (!pci
->table_group
) {
1371 pci
->table_group
= iommu_pseries_alloc_group(pci
->phb
->node
);
1372 tbl
= pci
->table_group
->tables
[0];
1373 iommu_table_setparms_lpar(pci
->phb
, pdn
, tbl
,
1374 pci
->table_group
, dma_window
);
1375 tbl
->it_ops
= &iommu_table_lpar_multi_ops
;
1376 iommu_init_table(tbl
, pci
->phb
->node
, 0, 0);
1377 iommu_register_group(pci
->table_group
,
1378 pci_domain_nr(pci
->phb
->bus
), 0);
1379 pr_debug(" created table: %p\n", pci
->table_group
);
1381 pr_debug(" found DMA window, table: %p\n", pci
->table_group
);
1384 set_iommu_table_base(&dev
->dev
, pci
->table_group
->tables
[0]);
1385 iommu_add_device(pci
->table_group
, &dev
->dev
);
1388 static bool iommu_bypass_supported_pSeriesLP(struct pci_dev
*pdev
, u64 dma_mask
)
1390 struct device_node
*dn
= pci_device_to_OF_node(pdev
), *pdn
;
1391 const __be32
*dma_window
= NULL
;
1393 /* only attempt to use a new window if 64-bit DMA is requested */
1394 if (dma_mask
< DMA_BIT_MASK(64))
1397 dev_dbg(&pdev
->dev
, "node is %pOF\n", dn
);
1400 * the device tree might contain the dma-window properties
1401 * per-device and not necessarily for the bus. So we need to
1402 * search upwards in the tree until we either hit a dma-window
1403 * property, OR find a parent with a table already allocated.
1405 for (pdn
= dn
; pdn
&& PCI_DN(pdn
) && !PCI_DN(pdn
)->table_group
;
1406 pdn
= pdn
->parent
) {
1407 dma_window
= of_get_property(pdn
, "ibm,dma-window", NULL
);
1412 if (pdn
&& PCI_DN(pdn
)) {
1413 pdev
->dev
.archdata
.dma_offset
= enable_ddw(pdev
, pdn
);
1414 if (pdev
->dev
.archdata
.dma_offset
)
1421 static int iommu_mem_notifier(struct notifier_block
*nb
, unsigned long action
,
1424 struct direct_window
*window
;
1425 struct memory_notify
*arg
= data
;
1429 case MEM_GOING_ONLINE
:
1430 spin_lock(&direct_window_list_lock
);
1431 list_for_each_entry(window
, &direct_window_list
, list
) {
1432 ret
|= tce_setrange_multi_pSeriesLP(arg
->start_pfn
,
1433 arg
->nr_pages
, window
->prop
);
1436 spin_unlock(&direct_window_list_lock
);
1438 case MEM_CANCEL_ONLINE
:
1440 spin_lock(&direct_window_list_lock
);
1441 list_for_each_entry(window
, &direct_window_list
, list
) {
1442 ret
|= tce_clearrange_multi_pSeriesLP(arg
->start_pfn
,
1443 arg
->nr_pages
, window
->prop
);
1446 spin_unlock(&direct_window_list_lock
);
1451 if (ret
&& action
!= MEM_CANCEL_ONLINE
)
1457 static struct notifier_block iommu_mem_nb
= {
1458 .notifier_call
= iommu_mem_notifier
,
1461 static int iommu_reconfig_notifier(struct notifier_block
*nb
, unsigned long action
, void *data
)
1463 int err
= NOTIFY_OK
;
1464 struct of_reconfig_data
*rd
= data
;
1465 struct device_node
*np
= rd
->dn
;
1466 struct pci_dn
*pci
= PCI_DN(np
);
1467 struct direct_window
*window
;
1470 case OF_RECONFIG_DETACH_NODE
:
1472 * Removing the property will invoke the reconfig
1473 * notifier again, which causes dead-lock on the
1474 * read-write semaphore of the notifier chain. So
1475 * we have to remove the property when releasing
1478 remove_ddw(np
, false);
1479 if (pci
&& pci
->table_group
)
1480 iommu_pseries_free_group(pci
->table_group
,
1483 spin_lock(&direct_window_list_lock
);
1484 list_for_each_entry(window
, &direct_window_list
, list
) {
1485 if (window
->device
== np
) {
1486 list_del(&window
->list
);
1491 spin_unlock(&direct_window_list_lock
);
1500 static struct notifier_block iommu_reconfig_nb
= {
1501 .notifier_call
= iommu_reconfig_notifier
,
1504 /* These are called very early. */
1505 void iommu_init_early_pSeries(void)
1507 if (of_chosen
&& of_get_property(of_chosen
, "linux,iommu-off", NULL
))
1510 if (firmware_has_feature(FW_FEATURE_LPAR
)) {
1511 pseries_pci_controller_ops
.dma_bus_setup
= pci_dma_bus_setup_pSeriesLP
;
1512 pseries_pci_controller_ops
.dma_dev_setup
= pci_dma_dev_setup_pSeriesLP
;
1514 pseries_pci_controller_ops
.iommu_bypass_supported
=
1515 iommu_bypass_supported_pSeriesLP
;
1517 pseries_pci_controller_ops
.dma_bus_setup
= pci_dma_bus_setup_pSeries
;
1518 pseries_pci_controller_ops
.dma_dev_setup
= pci_dma_dev_setup_pSeries
;
1522 of_reconfig_notifier_register(&iommu_reconfig_nb
);
1523 register_memory_notifier(&iommu_mem_nb
);
1525 set_pci_dma_ops(&dma_iommu_ops
);
1528 static int __init
disable_multitce(char *str
)
1530 if (strcmp(str
, "off") == 0 &&
1531 firmware_has_feature(FW_FEATURE_LPAR
) &&
1532 (firmware_has_feature(FW_FEATURE_PUT_TCE_IND
) ||
1533 firmware_has_feature(FW_FEATURE_STUFF_TCE
))) {
1534 printk(KERN_INFO
"Disabling MULTITCE firmware feature\n");
1535 powerpc_firmware_features
&=
1536 ~(FW_FEATURE_PUT_TCE_IND
| FW_FEATURE_STUFF_TCE
);
1541 __setup("multitce=", disable_multitce
);
1543 static int tce_iommu_bus_notifier(struct notifier_block
*nb
,
1544 unsigned long action
, void *data
)
1546 struct device
*dev
= data
;
1549 case BUS_NOTIFY_DEL_DEVICE
:
1550 iommu_del_device(dev
);
1557 static struct notifier_block tce_iommu_bus_nb
= {
1558 .notifier_call
= tce_iommu_bus_notifier
,
1561 static int __init
tce_iommu_bus_notifier_init(void)
1563 bus_register_notifier(&pci_bus_type
, &tce_iommu_bus_nb
);
1566 machine_subsys_initcall_sync(pseries
, tce_iommu_bus_notifier_init
);