2 * IOMMU implementation for Cell Broadband Processor Architecture
3 * We just establish a linear mapping at boot by setting all the
4 * IOPT cache entries in the CPU.
5 * The mapping functions should be identical to pci_direct_iommu,
6 * except for the handling of the high order bit that is required
7 * by the Spider bridge. These should be split into a separate
8 * file at the point where we get a different bridge chip.
10 * Copyright (C) 2005 IBM Deutschland Entwicklung GmbH,
11 * Arnd Bergmann <arndb@de.ibm.com>
13 * Based on linear mapping
14 * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
24 #include <linux/kernel.h>
25 #include <linux/pci.h>
26 #include <linux/delay.h>
27 #include <linux/string.h>
28 #include <linux/init.h>
29 #include <linux/bootmem.h>
31 #include <linux/dma-mapping.h>
32 #include <linux/kernel.h>
33 #include <linux/compiler.h>
35 #include <asm/sections.h>
36 #include <asm/iommu.h>
39 #include <asm/pci-bridge.h>
40 #include <asm/machdep.h>
41 #include <asm/pmac_feature.h>
42 #include <asm/abs_addr.h>
43 #include <asm/system.h>
44 #include <asm/ppc-pci.h>
49 static inline unsigned long
50 get_iopt_entry(unsigned long real_address
, unsigned long ioid
,
53 return (prot
& IOPT_PROT_MASK
)
56 | (real_address
& IOPT_RPN_MASK
)
57 | (ioid
& IOPT_IOID_MASK
);
65 mk_ioste(unsigned long val
)
67 ioste ioste
= { .val
= val
, };
72 get_iost_entry(unsigned long iopt_base
, unsigned long io_address
, unsigned page_size
)
82 nnpt
= 0; /* one page per segment */
83 shift
= 5; /* segment has 16 iopt entries */
88 nnpt
= 0; /* one page per segment */
89 shift
= 1; /* segment has 256 iopt entries */
94 nnpt
= 0x07; /* 8 pages per io page table */
95 shift
= 0; /* all entries are used */
100 nnpt
= 0x7f; /* 128 pages per io page table */
101 shift
= 0; /* all entries are used */
104 default: /* not a known compile time constant */
106 /* BUILD_BUG_ON() is not usable here */
107 extern void __get_iost_entry_bad_page_size(void);
108 __get_iost_entry_bad_page_size();
114 /* need 8 bytes per iopte */
115 (((io_address
/ page_size
* 8)
116 /* align io page tables on 4k page boundaries */
118 /* nnpt+1 pages go into each iopt */
121 nnpt
++; /* this seems to work, but the documentation is not clear
122 about wether we put nnpt or nnpt-1 into the ioste bits.
123 In theory, this can't work for 4k pages. */
124 return mk_ioste(IOST_VALID_MASK
125 | (iostep
& IOST_PT_BASE_MASK
)
126 | ((nnpt
<< 5) & IOST_NNPT_MASK
)
127 | (ps
& IOST_PS_MASK
));
130 /* compute the address of an io pte */
131 static inline unsigned long
132 get_ioptep(ioste iost_entry
, unsigned long io_address
)
134 unsigned long iopt_base
;
135 unsigned long page_size
;
136 unsigned long page_number
;
137 unsigned long iopt_offset
;
139 iopt_base
= iost_entry
.val
& IOST_PT_BASE_MASK
;
140 page_size
= iost_entry
.val
& IOST_PS_MASK
;
142 /* decode page size to compute page number */
143 page_number
= (io_address
& 0x0fffffff) >> (10 + 2 * page_size
);
144 /* page number is an offset into the io page table */
145 iopt_offset
= (page_number
<< 3) & 0x7fff8ul
;
146 return iopt_base
+ iopt_offset
;
149 /* compute the tag field of the iopt cache entry */
150 static inline unsigned long
151 get_ioc_tag(ioste iost_entry
, unsigned long io_address
)
153 unsigned long iopte
= get_ioptep(iost_entry
, io_address
);
155 return IOPT_VALID_MASK
156 | ((iopte
& 0x00000000000000ff8ul
) >> 3)
157 | ((iopte
& 0x0000003fffffc0000ul
) >> 9);
160 /* compute the hashed 6 bit index for the 4-way associative pte cache */
161 static inline unsigned long
162 get_ioc_hash(ioste iost_entry
, unsigned long io_address
)
164 unsigned long iopte
= get_ioptep(iost_entry
, io_address
);
166 return ((iopte
& 0x000000000000001f8ul
) >> 3)
167 ^ ((iopte
& 0x00000000000020000ul
) >> 17)
168 ^ ((iopte
& 0x00000000000010000ul
) >> 15)
169 ^ ((iopte
& 0x00000000000008000ul
) >> 13)
170 ^ ((iopte
& 0x00000000000004000ul
) >> 11)
171 ^ ((iopte
& 0x00000000000002000ul
) >> 9)
172 ^ ((iopte
& 0x00000000000001000ul
) >> 7);
175 /* same as above, but pretend that we have a simpler 1-way associative
176 pte cache with an 8 bit index */
177 static inline unsigned long
178 get_ioc_hash_1way(ioste iost_entry
, unsigned long io_address
)
180 unsigned long iopte
= get_ioptep(iost_entry
, io_address
);
182 return ((iopte
& 0x000000000000001f8ul
) >> 3)
183 ^ ((iopte
& 0x00000000000020000ul
) >> 17)
184 ^ ((iopte
& 0x00000000000010000ul
) >> 15)
185 ^ ((iopte
& 0x00000000000008000ul
) >> 13)
186 ^ ((iopte
& 0x00000000000004000ul
) >> 11)
187 ^ ((iopte
& 0x00000000000002000ul
) >> 9)
188 ^ ((iopte
& 0x00000000000001000ul
) >> 7)
189 ^ ((iopte
& 0x0000000000000c000ul
) >> 8);
193 get_iost_cache(void __iomem
*base
, unsigned long index
)
195 unsigned long __iomem
*p
= (base
+ IOC_ST_CACHE_DIR
);
196 return mk_ioste(in_be64(&p
[index
]));
200 set_iost_cache(void __iomem
*base
, unsigned long index
, ioste ste
)
202 unsigned long __iomem
*p
= (base
+ IOC_ST_CACHE_DIR
);
203 pr_debug("ioste %02lx was %016lx, store %016lx", index
,
204 get_iost_cache(base
, index
).val
, ste
.val
);
205 out_be64(&p
[index
], ste
.val
);
206 pr_debug(" now %016lx\n", get_iost_cache(base
, index
).val
);
209 static inline unsigned long
210 get_iopt_cache(void __iomem
*base
, unsigned long index
, unsigned long *tag
)
212 unsigned long __iomem
*tags
= (void *)(base
+ IOC_PT_CACHE_DIR
);
213 unsigned long __iomem
*p
= (void *)(base
+ IOC_PT_CACHE_REG
);
221 set_iopt_cache(void __iomem
*base
, unsigned long index
,
222 unsigned long tag
, unsigned long val
)
224 unsigned long __iomem
*tags
= base
+ IOC_PT_CACHE_DIR
;
225 unsigned long __iomem
*p
= base
+ IOC_PT_CACHE_REG
;
228 out_be64(&tags
[index
], tag
);
232 set_iost_origin(void __iomem
*base
)
234 unsigned long __iomem
*p
= base
+ IOC_ST_ORIGIN
;
235 unsigned long origin
= IOSTO_ENABLE
| IOSTO_SW
;
237 pr_debug("iost_origin %016lx, now %016lx\n", in_be64(p
), origin
);
242 set_iocmd_config(void __iomem
*base
)
244 unsigned long __iomem
*p
= base
+ 0xc00;
248 pr_debug("iost_conf %016lx, now %016lx\n", conf
, conf
| IOCMD_CONF_TE
);
249 out_be64(p
, conf
| IOCMD_CONF_TE
);
252 static void enable_mapping(void __iomem
*base
, void __iomem
*mmio_base
)
254 set_iocmd_config(base
);
255 set_iost_origin(mmio_base
);
258 static void iommu_dev_setup_null(struct pci_dev
*d
) { }
259 static void iommu_bus_setup_null(struct pci_bus
*b
) { }
263 unsigned long mmio_base
;
264 void __iomem
*mapped_base
;
265 void __iomem
*mapped_mmio_base
;
268 static struct cell_iommu cell_iommus
[NR_CPUS
];
270 /* initialize the iommu to support a simple linear mapping
271 * for each DMA window used by any device. For now, we
272 * happen to know that there is only one DMA window in use,
273 * starting at iopt_phys_offset. */
274 static void cell_do_map_iommu(struct cell_iommu
*iommu
,
276 unsigned long map_start
,
277 unsigned long map_size
)
279 unsigned long io_address
, real_address
;
280 void __iomem
*ioc_base
, *ioc_mmio_base
;
284 /* we pretend the io page table was at a very high address */
285 const unsigned long fake_iopt
= 0x10000000000ul
;
286 const unsigned long io_page_size
= 0x1000000; /* use 16M pages */
287 const unsigned long io_segment_size
= 0x10000000; /* 256M */
289 ioc_base
= iommu
->mapped_base
;
290 ioc_mmio_base
= iommu
->mapped_mmio_base
;
292 for (real_address
= 0, io_address
= map_start
;
293 io_address
<= map_start
+ map_size
;
294 real_address
+= io_page_size
, io_address
+= io_page_size
) {
295 ioste
= get_iost_entry(fake_iopt
, io_address
, io_page_size
);
296 if ((real_address
% io_segment_size
) == 0) /* segment start */
297 set_iost_cache(ioc_mmio_base
,
298 io_address
>> 28, ioste
);
299 index
= get_ioc_hash_1way(ioste
, io_address
);
300 pr_debug("addr %08lx, index %02lx, ioste %016lx\n",
301 io_address
, index
, ioste
.val
);
302 set_iopt_cache(ioc_mmio_base
,
303 get_ioc_hash_1way(ioste
, io_address
),
304 get_ioc_tag(ioste
, io_address
),
305 get_iopt_entry(real_address
, ioid
, IOPT_PROT_RW
));
309 static void iommu_devnode_setup(struct device_node
*d
)
311 const unsigned int *ioid
;
312 unsigned long map_start
, map_size
, token
;
313 const unsigned long *dma_window
;
314 struct cell_iommu
*iommu
;
316 ioid
= get_property(d
, "ioid", NULL
);
318 pr_debug("No ioid entry found !\n");
320 dma_window
= get_property(d
, "ibm,dma-window", NULL
);
322 pr_debug("No ibm,dma-window entry found !\n");
324 map_start
= dma_window
[1];
325 map_size
= dma_window
[2];
326 token
= dma_window
[0] >> 32;
328 iommu
= &cell_iommus
[token
];
330 cell_do_map_iommu(iommu
, *ioid
, map_start
, map_size
);
333 static void iommu_bus_setup(struct pci_bus
*b
)
335 struct device_node
*d
= (struct device_node
*)b
->sysdata
;
336 iommu_devnode_setup(d
);
340 static int cell_map_iommu_hardcoded(int num_nodes
)
342 struct cell_iommu
*iommu
= NULL
;
344 pr_debug("%s(%d): Using hardcoded defaults\n", __FUNCTION__
, __LINE__
);
347 iommu
= &cell_iommus
[0];
348 iommu
->mapped_base
= ioremap(0x20000511000, 0x1000);
349 iommu
->mapped_mmio_base
= ioremap(0x20000510000, 0x1000);
351 enable_mapping(iommu
->mapped_base
, iommu
->mapped_mmio_base
);
353 cell_do_map_iommu(iommu
, 0x048a,
354 0x20000000ul
,0x20000000ul
);
360 iommu
= &cell_iommus
[1];
361 iommu
->mapped_base
= ioremap(0x30000511000, 0x1000);
362 iommu
->mapped_mmio_base
= ioremap(0x30000510000, 0x1000);
364 enable_mapping(iommu
->mapped_base
, iommu
->mapped_mmio_base
);
366 cell_do_map_iommu(iommu
, 0x048a,
367 0x20000000,0x20000000ul
);
373 static int cell_map_iommu(void)
375 unsigned int num_nodes
= 0;
376 const unsigned int *node_id
;
377 const unsigned long *base
, *mmio_base
;
378 struct device_node
*dn
;
379 struct cell_iommu
*iommu
= NULL
;
381 /* determine number of nodes (=iommus) */
382 pr_debug("%s(%d): determining number of nodes...", __FUNCTION__
, __LINE__
);
383 for(dn
= of_find_node_by_type(NULL
, "cpu");
385 dn
= of_find_node_by_type(dn
, "cpu")) {
386 node_id
= get_property(dn
, "node-id", NULL
);
388 if (num_nodes
< *node_id
)
389 num_nodes
= *node_id
;
393 pr_debug("%i found.\n", num_nodes
);
395 /* map the iommu registers for each node */
396 pr_debug("%s(%d): Looping through nodes\n", __FUNCTION__
, __LINE__
);
397 for(dn
= of_find_node_by_type(NULL
, "cpu");
399 dn
= of_find_node_by_type(dn
, "cpu")) {
401 node_id
= get_property(dn
, "node-id", NULL
);
402 base
= get_property(dn
, "ioc-cache", NULL
);
403 mmio_base
= get_property(dn
, "ioc-translation", NULL
);
405 if (!base
|| !mmio_base
|| !node_id
)
406 return cell_map_iommu_hardcoded(num_nodes
);
408 iommu
= &cell_iommus
[*node_id
];
410 iommu
->mmio_base
= *mmio_base
;
412 iommu
->mapped_base
= ioremap(*base
, 0x1000);
413 iommu
->mapped_mmio_base
= ioremap(*mmio_base
, 0x1000);
415 enable_mapping(iommu
->mapped_base
,
416 iommu
->mapped_mmio_base
);
418 /* everything else will be done in iommu_bus_setup */
424 static void *cell_alloc_coherent(struct device
*hwdev
, size_t size
,
425 dma_addr_t
*dma_handle
, gfp_t flag
)
429 ret
= (void *)__get_free_pages(flag
, get_order(size
));
431 memset(ret
, 0, size
);
432 *dma_handle
= virt_to_abs(ret
) | CELL_DMA_VALID
;
437 static void cell_free_coherent(struct device
*hwdev
, size_t size
,
438 void *vaddr
, dma_addr_t dma_handle
)
440 free_pages((unsigned long)vaddr
, get_order(size
));
443 static dma_addr_t
cell_map_single(struct device
*hwdev
, void *ptr
,
444 size_t size
, enum dma_data_direction direction
)
446 return virt_to_abs(ptr
) | CELL_DMA_VALID
;
449 static void cell_unmap_single(struct device
*hwdev
, dma_addr_t dma_addr
,
450 size_t size
, enum dma_data_direction direction
)
454 static int cell_map_sg(struct device
*hwdev
, struct scatterlist
*sg
,
455 int nents
, enum dma_data_direction direction
)
459 for (i
= 0; i
< nents
; i
++, sg
++) {
460 sg
->dma_address
= (page_to_phys(sg
->page
) + sg
->offset
)
462 sg
->dma_length
= sg
->length
;
468 static void cell_unmap_sg(struct device
*hwdev
, struct scatterlist
*sg
,
469 int nents
, enum dma_data_direction direction
)
473 static int cell_dma_supported(struct device
*dev
, u64 mask
)
475 return mask
< 0x100000000ull
;
478 static struct dma_mapping_ops cell_iommu_ops
= {
479 .alloc_coherent
= cell_alloc_coherent
,
480 .free_coherent
= cell_free_coherent
,
481 .map_single
= cell_map_single
,
482 .unmap_single
= cell_unmap_single
,
483 .map_sg
= cell_map_sg
,
484 .unmap_sg
= cell_unmap_sg
,
485 .dma_supported
= cell_dma_supported
,
488 void cell_init_iommu(void)
492 if (of_find_node_by_path("/mambo")) {
493 pr_info("Not using iommu on systemsim\n");
497 get_property(of_chosen
, "linux,iommu-off", NULL
)))
498 setup_bus
= cell_map_iommu();
501 pr_debug("%s: IOMMU mapping activated\n", __FUNCTION__
);
502 ppc_md
.iommu_dev_setup
= iommu_dev_setup_null
;
503 ppc_md
.iommu_bus_setup
= iommu_bus_setup
;
505 pr_debug("%s: IOMMU mapping activated, "
506 "no device action necessary\n", __FUNCTION__
);
507 /* Direct I/O, IOMMU off */
508 ppc_md
.iommu_dev_setup
= iommu_dev_setup_null
;
509 ppc_md
.iommu_bus_setup
= iommu_bus_setup_null
;
513 pci_dma_ops
= cell_iommu_ops
;