2 * IOMMU implementation for Broadband Processor Architecture
3 * We just establish a linear mapping at boot by setting all the
4 * IOPT cache entries in the CPU.
5 * The mapping functions should be identical to pci_direct_iommu,
6 * except for the handling of the high order bit that is required
7 * by the Spider bridge. These should be split into a separate
8 * file at the point where we get a different bridge chip.
10 * Copyright (C) 2005 IBM Deutschland Entwicklung GmbH,
11 * Arnd Bergmann <arndb@de.ibm.com>
13 * Based on linear mapping
14 * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
24 #include <linux/kernel.h>
25 #include <linux/pci.h>
26 #include <linux/delay.h>
27 #include <linux/string.h>
28 #include <linux/init.h>
29 #include <linux/bootmem.h>
31 #include <linux/dma-mapping.h>
33 #include <asm/sections.h>
34 #include <asm/iommu.h>
37 #include <asm/pci-bridge.h>
38 #include <asm/machdep.h>
39 #include <asm/pmac_feature.h>
40 #include <asm/abs_addr.h>
41 #include <asm/system.h>
44 #include "bpa_iommu.h"
46 static inline unsigned long
47 get_iopt_entry(unsigned long real_address
, unsigned long ioid
,
50 return (prot
& IOPT_PROT_MASK
)
53 | (real_address
& IOPT_RPN_MASK
)
54 | (ioid
& IOPT_IOID_MASK
);
62 mk_ioste(unsigned long val
)
64 ioste ioste
= { .val
= val
, };
69 get_iost_entry(unsigned long iopt_base
, unsigned long io_address
, unsigned page_size
)
79 nnpt
= 0; /* one page per segment */
80 shift
= 5; /* segment has 16 iopt entries */
85 nnpt
= 0; /* one page per segment */
86 shift
= 1; /* segment has 256 iopt entries */
91 nnpt
= 0x07; /* 8 pages per io page table */
92 shift
= 0; /* all entries are used */
97 nnpt
= 0x7f; /* 128 pages per io page table */
98 shift
= 0; /* all entries are used */
101 default: /* not a known compile time constant */
107 /* need 8 bytes per iopte */
108 (((io_address
/ page_size
* 8)
109 /* align io page tables on 4k page boundaries */
111 /* nnpt+1 pages go into each iopt */
114 nnpt
++; /* this seems to work, but the documentation is not clear
115 about wether we put nnpt or nnpt-1 into the ioste bits.
116 In theory, this can't work for 4k pages. */
117 return mk_ioste(IOST_VALID_MASK
118 | (iostep
& IOST_PT_BASE_MASK
)
119 | ((nnpt
<< 5) & IOST_NNPT_MASK
)
120 | (ps
& IOST_PS_MASK
));
123 /* compute the address of an io pte */
124 static inline unsigned long
125 get_ioptep(ioste iost_entry
, unsigned long io_address
)
127 unsigned long iopt_base
;
128 unsigned long page_size
;
129 unsigned long page_number
;
130 unsigned long iopt_offset
;
132 iopt_base
= iost_entry
.val
& IOST_PT_BASE_MASK
;
133 page_size
= iost_entry
.val
& IOST_PS_MASK
;
135 /* decode page size to compute page number */
136 page_number
= (io_address
& 0x0fffffff) >> (10 + 2 * page_size
);
137 /* page number is an offset into the io page table */
138 iopt_offset
= (page_number
<< 3) & 0x7fff8ul
;
139 return iopt_base
+ iopt_offset
;
142 /* compute the tag field of the iopt cache entry */
143 static inline unsigned long
144 get_ioc_tag(ioste iost_entry
, unsigned long io_address
)
146 unsigned long iopte
= get_ioptep(iost_entry
, io_address
);
148 return IOPT_VALID_MASK
149 | ((iopte
& 0x00000000000000ff8ul
) >> 3)
150 | ((iopte
& 0x0000003fffffc0000ul
) >> 9);
153 /* compute the hashed 6 bit index for the 4-way associative pte cache */
154 static inline unsigned long
155 get_ioc_hash(ioste iost_entry
, unsigned long io_address
)
157 unsigned long iopte
= get_ioptep(iost_entry
, io_address
);
159 return ((iopte
& 0x000000000000001f8ul
) >> 3)
160 ^ ((iopte
& 0x00000000000020000ul
) >> 17)
161 ^ ((iopte
& 0x00000000000010000ul
) >> 15)
162 ^ ((iopte
& 0x00000000000008000ul
) >> 13)
163 ^ ((iopte
& 0x00000000000004000ul
) >> 11)
164 ^ ((iopte
& 0x00000000000002000ul
) >> 9)
165 ^ ((iopte
& 0x00000000000001000ul
) >> 7);
168 /* same as above, but pretend that we have a simpler 1-way associative
169 pte cache with an 8 bit index */
170 static inline unsigned long
171 get_ioc_hash_1way(ioste iost_entry
, unsigned long io_address
)
173 unsigned long iopte
= get_ioptep(iost_entry
, io_address
);
175 return ((iopte
& 0x000000000000001f8ul
) >> 3)
176 ^ ((iopte
& 0x00000000000020000ul
) >> 17)
177 ^ ((iopte
& 0x00000000000010000ul
) >> 15)
178 ^ ((iopte
& 0x00000000000008000ul
) >> 13)
179 ^ ((iopte
& 0x00000000000004000ul
) >> 11)
180 ^ ((iopte
& 0x00000000000002000ul
) >> 9)
181 ^ ((iopte
& 0x00000000000001000ul
) >> 7)
182 ^ ((iopte
& 0x0000000000000c000ul
) >> 8);
186 get_iost_cache(void __iomem
*base
, unsigned long index
)
188 unsigned long __iomem
*p
= (base
+ IOC_ST_CACHE_DIR
);
189 return mk_ioste(in_be64(&p
[index
]));
193 set_iost_cache(void __iomem
*base
, unsigned long index
, ioste ste
)
195 unsigned long __iomem
*p
= (base
+ IOC_ST_CACHE_DIR
);
196 pr_debug("ioste %02lx was %016lx, store %016lx", index
,
197 get_iost_cache(base
, index
).val
, ste
.val
);
198 out_be64(&p
[index
], ste
.val
);
199 pr_debug(" now %016lx\n", get_iost_cache(base
, index
).val
);
202 static inline unsigned long
203 get_iopt_cache(void __iomem
*base
, unsigned long index
, unsigned long *tag
)
205 unsigned long __iomem
*tags
= (void *)(base
+ IOC_PT_CACHE_DIR
);
206 unsigned long __iomem
*p
= (void *)(base
+ IOC_PT_CACHE_REG
);
214 set_iopt_cache(void __iomem
*base
, unsigned long index
,
215 unsigned long tag
, unsigned long val
)
217 unsigned long __iomem
*tags
= base
+ IOC_PT_CACHE_DIR
;
218 unsigned long __iomem
*p
= base
+ IOC_PT_CACHE_REG
;
219 pr_debug("iopt %02lx was v%016lx/t%016lx, store v%016lx/t%016lx\n",
220 index
, get_iopt_cache(base
, index
, &oldtag
), oldtag
, val
, tag
);
223 out_be64(&tags
[index
], tag
);
227 set_iost_origin(void __iomem
*base
)
229 unsigned long __iomem
*p
= base
+ IOC_ST_ORIGIN
;
230 unsigned long origin
= IOSTO_ENABLE
| IOSTO_SW
;
232 pr_debug("iost_origin %016lx, now %016lx\n", in_be64(p
), origin
);
237 set_iocmd_config(void __iomem
*base
)
239 unsigned long __iomem
*p
= base
+ 0xc00;
243 pr_debug("iost_conf %016lx, now %016lx\n", conf
, conf
| IOCMD_CONF_TE
);
244 out_be64(p
, conf
| IOCMD_CONF_TE
);
247 /* FIXME: get these from the device tree */
248 #define ioc_base 0x20000511000ull
249 #define ioc_mmio_base 0x20000510000ull
251 #define iopt_phys_offset (- 0x20000000) /* We have a 512MB offset from the SB */
252 #define io_page_size 0x1000000
254 static unsigned long map_iopt_entry(unsigned long address
)
256 switch (address
>> 20) {
258 address
= 0x24020000000ull
; /* spider i/o */
261 address
+= iopt_phys_offset
;
265 return get_iopt_entry(address
, ioid
, IOPT_PROT_RW
);
268 static void iommu_bus_setup_null(struct pci_bus
*b
) { }
269 static void iommu_dev_setup_null(struct pci_dev
*d
) { }
271 /* initialize the iommu to support a simple linear mapping
272 * for each DMA window used by any device. For now, we
273 * happen to know that there is only one DMA window in use,
274 * starting at iopt_phys_offset. */
275 static void bpa_map_iommu(void)
277 unsigned long address
;
282 base
= __ioremap(ioc_base
, 0x1000, _PAGE_NO_CACHE
);
283 pr_debug("%lx mapped to %p\n", ioc_base
, base
);
284 set_iocmd_config(base
);
287 base
= __ioremap(ioc_mmio_base
, 0x1000, _PAGE_NO_CACHE
);
288 pr_debug("%lx mapped to %p\n", ioc_mmio_base
, base
);
290 set_iost_origin(base
);
292 for (address
= 0; address
< 0x100000000ul
; address
+= io_page_size
) {
293 ioste
= get_iost_entry(0x10000000000ul
, address
, io_page_size
);
294 if ((address
& 0xfffffff) == 0) /* segment start */
295 set_iost_cache(base
, address
>> 28, ioste
);
296 index
= get_ioc_hash_1way(ioste
, address
);
297 pr_debug("addr %08lx, index %02lx, ioste %016lx\n",
298 address
, index
, ioste
.val
);
300 get_ioc_hash_1way(ioste
, address
),
301 get_ioc_tag(ioste
, address
),
302 map_iopt_entry(address
));
308 static void *bpa_alloc_coherent(struct device
*hwdev
, size_t size
,
309 dma_addr_t
*dma_handle
, unsigned int __nocast flag
)
313 ret
= (void *)__get_free_pages(flag
, get_order(size
));
315 memset(ret
, 0, size
);
316 *dma_handle
= virt_to_abs(ret
) | BPA_DMA_VALID
;
321 static void bpa_free_coherent(struct device
*hwdev
, size_t size
,
322 void *vaddr
, dma_addr_t dma_handle
)
324 free_pages((unsigned long)vaddr
, get_order(size
));
327 static dma_addr_t
bpa_map_single(struct device
*hwdev
, void *ptr
,
328 size_t size
, enum dma_data_direction direction
)
330 return virt_to_abs(ptr
) | BPA_DMA_VALID
;
333 static void bpa_unmap_single(struct device
*hwdev
, dma_addr_t dma_addr
,
334 size_t size
, enum dma_data_direction direction
)
338 static int bpa_map_sg(struct device
*hwdev
, struct scatterlist
*sg
,
339 int nents
, enum dma_data_direction direction
)
343 for (i
= 0; i
< nents
; i
++, sg
++) {
344 sg
->dma_address
= (page_to_phys(sg
->page
) + sg
->offset
)
346 sg
->dma_length
= sg
->length
;
352 static void bpa_unmap_sg(struct device
*hwdev
, struct scatterlist
*sg
,
353 int nents
, enum dma_data_direction direction
)
357 static int bpa_dma_supported(struct device
*dev
, u64 mask
)
359 return mask
< 0x100000000ull
;
362 void bpa_init_iommu(void)
366 /* Direct I/O, IOMMU off */
367 ppc_md
.iommu_dev_setup
= iommu_dev_setup_null
;
368 ppc_md
.iommu_bus_setup
= iommu_bus_setup_null
;
370 pci_dma_ops
.alloc_coherent
= bpa_alloc_coherent
;
371 pci_dma_ops
.free_coherent
= bpa_free_coherent
;
372 pci_dma_ops
.map_single
= bpa_map_single
;
373 pci_dma_ops
.unmap_single
= bpa_unmap_single
;
374 pci_dma_ops
.map_sg
= bpa_map_sg
;
375 pci_dma_ops
.unmap_sg
= bpa_unmap_sg
;
376 pci_dma_ops
.dma_supported
= bpa_dma_supported
;