2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
16 #include <linux/dma-mapping.h>
17 #include <linux/swiotlb.h>
18 #include <linux/vmalloc.h>
19 #include <linux/export.h>
20 #include <asm/tlbflush.h>
21 #include <asm/homecache.h>
23 /* Generic DMA mapping functions: */
26 * Allocate what Linux calls "coherent" memory. On TILEPro this is
27 * uncached memory; on TILE-Gx it is hash-for-home memory.
30 #define PAGE_HOME_DMA PAGE_HOME_UNCACHED
32 #define PAGE_HOME_DMA PAGE_HOME_HASH
35 static void *tile_dma_alloc_coherent(struct device
*dev
, size_t size
,
36 dma_addr_t
*dma_handle
, gfp_t gfp
,
39 u64 dma_mask
= (dev
&& dev
->coherent_dma_mask
) ?
40 dev
->coherent_dma_mask
: DMA_BIT_MASK(32);
41 int node
= dev
? dev_to_node(dev
) : 0;
42 int order
= get_order(size
);
49 * If the mask specifies that the memory be in the first 4 GB, then
50 * we force the allocation to come from the DMA zone. We also
51 * force the node to 0 since that's the only node where the DMA
52 * zone isn't empty. If the mask size is smaller than 32 bits, we
53 * may still not be able to guarantee a suitable memory address, in
54 * which case we will return NULL. But such devices are uncommon.
56 if (dma_mask
<= DMA_BIT_MASK(32)) {
61 pg
= homecache_alloc_pages_node(node
, gfp
, order
, PAGE_HOME_DMA
);
65 addr
= page_to_phys(pg
);
66 if (addr
+ size
> dma_mask
) {
67 __homecache_free_pages(pg
, order
);
73 return page_address(pg
);
77 * Free memory that was allocated with tile_dma_alloc_coherent.
79 static void tile_dma_free_coherent(struct device
*dev
, size_t size
,
80 void *vaddr
, dma_addr_t dma_handle
,
83 homecache_free_pages((unsigned long)vaddr
, get_order(size
));
87 * The map routines "map" the specified address range for DMA
88 * accesses. The memory belongs to the device after this call is
89 * issued, until it is unmapped with dma_unmap_single.
91 * We don't need to do any mapping, we just flush the address range
92 * out of the cache and return a DMA address.
94 * The unmap routines do whatever is necessary before the processor
95 * accesses the memory again, and must be called before the driver
96 * touches the memory. We can get away with a cache invalidate if we
97 * can count on nothing having been touched.
100 /* Set up a single page for DMA access. */
101 static void __dma_prep_page(struct page
*page
, unsigned long offset
,
102 size_t size
, enum dma_data_direction direction
)
105 * Flush the page from cache if necessary.
106 * On tilegx, data is delivered to hash-for-home L3; on tilepro,
107 * data is delivered direct to memory.
109 * NOTE: If we were just doing DMA_TO_DEVICE we could optimize
110 * this to be a "flush" not a "finv" and keep some of the
111 * state in cache across the DMA operation, but it doesn't seem
112 * worth creating the necessary flush_buffer_xxx() infrastructure.
114 int home
= page_home(page
);
121 case PAGE_HOME_UNCACHED
:
126 case PAGE_HOME_IMMUTABLE
:
127 /* Should be going to the device only. */
128 BUG_ON(direction
== DMA_FROM_DEVICE
||
129 direction
== DMA_BIDIRECTIONAL
);
131 case PAGE_HOME_INCOHERENT
:
132 /* Incoherent anyway, so no need to work hard here. */
135 BUG_ON(home
< 0 || home
>= NR_CPUS
);
138 homecache_finv_page(page
);
140 #ifdef DEBUG_ALIGNMENT
141 /* Warn if the region isn't cacheline aligned. */
142 if (offset
& (L2_CACHE_BYTES
- 1) || (size
& (L2_CACHE_BYTES
- 1)))
143 pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n",
144 PFN_PHYS(page_to_pfn(page
)) + offset
, size
);
148 /* Make the page ready to be read by the core. */
149 static void __dma_complete_page(struct page
*page
, unsigned long offset
,
150 size_t size
, enum dma_data_direction direction
)
153 switch (page_home(page
)) {
155 /* I/O device delivered data the way the cpu wanted it. */
157 case PAGE_HOME_INCOHERENT
:
158 /* Incoherent anyway, so no need to work hard here. */
160 case PAGE_HOME_IMMUTABLE
:
161 /* Extra read-only copies are not a problem. */
164 /* Flush the bogus hash-for-home I/O entries to memory. */
165 homecache_finv_map_page(page
, PAGE_HOME_HASH
);
171 static void __dma_prep_pa_range(dma_addr_t dma_addr
, size_t size
,
172 enum dma_data_direction direction
)
174 struct page
*page
= pfn_to_page(PFN_DOWN(dma_addr
));
175 unsigned long offset
= dma_addr
& (PAGE_SIZE
- 1);
176 size_t bytes
= min(size
, (size_t)(PAGE_SIZE
- offset
));
179 __dma_prep_page(page
, offset
, bytes
, direction
);
183 bytes
= min((size_t)PAGE_SIZE
, size
);
187 static void __dma_complete_pa_range(dma_addr_t dma_addr
, size_t size
,
188 enum dma_data_direction direction
)
190 struct page
*page
= pfn_to_page(PFN_DOWN(dma_addr
));
191 unsigned long offset
= dma_addr
& (PAGE_SIZE
- 1);
192 size_t bytes
= min(size
, (size_t)(PAGE_SIZE
- offset
));
195 __dma_complete_page(page
, offset
, bytes
, direction
);
199 bytes
= min((size_t)PAGE_SIZE
, size
);
203 static int tile_dma_map_sg(struct device
*dev
, struct scatterlist
*sglist
,
204 int nents
, enum dma_data_direction direction
,
207 struct scatterlist
*sg
;
210 BUG_ON(!valid_dma_direction(direction
));
212 WARN_ON(nents
== 0 || sglist
->length
== 0);
214 for_each_sg(sglist
, sg
, nents
, i
) {
215 sg
->dma_address
= sg_phys(sg
);
216 __dma_prep_pa_range(sg
->dma_address
, sg
->length
, direction
);
217 #ifdef CONFIG_NEED_SG_DMA_LENGTH
218 sg
->dma_length
= sg
->length
;
225 static void tile_dma_unmap_sg(struct device
*dev
, struct scatterlist
*sglist
,
226 int nents
, enum dma_data_direction direction
,
229 struct scatterlist
*sg
;
232 BUG_ON(!valid_dma_direction(direction
));
233 for_each_sg(sglist
, sg
, nents
, i
) {
234 sg
->dma_address
= sg_phys(sg
);
235 __dma_complete_pa_range(sg
->dma_address
, sg
->length
,
240 static dma_addr_t
tile_dma_map_page(struct device
*dev
, struct page
*page
,
241 unsigned long offset
, size_t size
,
242 enum dma_data_direction direction
,
245 BUG_ON(!valid_dma_direction(direction
));
247 BUG_ON(offset
+ size
> PAGE_SIZE
);
248 __dma_prep_page(page
, offset
, size
, direction
);
250 return page_to_pa(page
) + offset
;
253 static void tile_dma_unmap_page(struct device
*dev
, dma_addr_t dma_address
,
254 size_t size
, enum dma_data_direction direction
,
257 BUG_ON(!valid_dma_direction(direction
));
259 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address
)),
260 dma_address
& (PAGE_SIZE
- 1), size
, direction
);
263 static void tile_dma_sync_single_for_cpu(struct device
*dev
,
264 dma_addr_t dma_handle
,
266 enum dma_data_direction direction
)
268 BUG_ON(!valid_dma_direction(direction
));
270 __dma_complete_pa_range(dma_handle
, size
, direction
);
273 static void tile_dma_sync_single_for_device(struct device
*dev
,
274 dma_addr_t dma_handle
, size_t size
,
275 enum dma_data_direction direction
)
277 __dma_prep_pa_range(dma_handle
, size
, direction
);
280 static void tile_dma_sync_sg_for_cpu(struct device
*dev
,
281 struct scatterlist
*sglist
, int nelems
,
282 enum dma_data_direction direction
)
284 struct scatterlist
*sg
;
287 BUG_ON(!valid_dma_direction(direction
));
288 WARN_ON(nelems
== 0 || sglist
->length
== 0);
290 for_each_sg(sglist
, sg
, nelems
, i
) {
291 dma_sync_single_for_cpu(dev
, sg
->dma_address
,
292 sg_dma_len(sg
), direction
);
296 static void tile_dma_sync_sg_for_device(struct device
*dev
,
297 struct scatterlist
*sglist
, int nelems
,
298 enum dma_data_direction direction
)
300 struct scatterlist
*sg
;
303 BUG_ON(!valid_dma_direction(direction
));
304 WARN_ON(nelems
== 0 || sglist
->length
== 0);
306 for_each_sg(sglist
, sg
, nelems
, i
) {
307 dma_sync_single_for_device(dev
, sg
->dma_address
,
308 sg_dma_len(sg
), direction
);
313 tile_dma_mapping_error(struct device
*dev
, dma_addr_t dma_addr
)
319 tile_dma_supported(struct device
*dev
, u64 mask
)
324 static struct dma_map_ops tile_default_dma_map_ops
= {
325 .alloc
= tile_dma_alloc_coherent
,
326 .free
= tile_dma_free_coherent
,
327 .map_page
= tile_dma_map_page
,
328 .unmap_page
= tile_dma_unmap_page
,
329 .map_sg
= tile_dma_map_sg
,
330 .unmap_sg
= tile_dma_unmap_sg
,
331 .sync_single_for_cpu
= tile_dma_sync_single_for_cpu
,
332 .sync_single_for_device
= tile_dma_sync_single_for_device
,
333 .sync_sg_for_cpu
= tile_dma_sync_sg_for_cpu
,
334 .sync_sg_for_device
= tile_dma_sync_sg_for_device
,
335 .mapping_error
= tile_dma_mapping_error
,
336 .dma_supported
= tile_dma_supported
339 struct dma_map_ops
*tile_dma_map_ops
= &tile_default_dma_map_ops
;
340 EXPORT_SYMBOL(tile_dma_map_ops
);
342 /* Generic PCI DMA mapping functions */
344 static void *tile_pci_dma_alloc_coherent(struct device
*dev
, size_t size
,
345 dma_addr_t
*dma_handle
, gfp_t gfp
,
348 int node
= dev_to_node(dev
);
349 int order
= get_order(size
);
355 pg
= homecache_alloc_pages_node(node
, gfp
, order
, PAGE_HOME_DMA
);
359 addr
= page_to_phys(pg
);
361 *dma_handle
= addr
+ get_dma_offset(dev
);
363 return page_address(pg
);
367 * Free memory that was allocated with tile_pci_dma_alloc_coherent.
369 static void tile_pci_dma_free_coherent(struct device
*dev
, size_t size
,
370 void *vaddr
, dma_addr_t dma_handle
,
373 homecache_free_pages((unsigned long)vaddr
, get_order(size
));
376 static int tile_pci_dma_map_sg(struct device
*dev
, struct scatterlist
*sglist
,
377 int nents
, enum dma_data_direction direction
,
380 struct scatterlist
*sg
;
383 BUG_ON(!valid_dma_direction(direction
));
385 WARN_ON(nents
== 0 || sglist
->length
== 0);
387 for_each_sg(sglist
, sg
, nents
, i
) {
388 sg
->dma_address
= sg_phys(sg
);
389 __dma_prep_pa_range(sg
->dma_address
, sg
->length
, direction
);
391 sg
->dma_address
= sg
->dma_address
+ get_dma_offset(dev
);
392 #ifdef CONFIG_NEED_SG_DMA_LENGTH
393 sg
->dma_length
= sg
->length
;
400 static void tile_pci_dma_unmap_sg(struct device
*dev
,
401 struct scatterlist
*sglist
, int nents
,
402 enum dma_data_direction direction
,
405 struct scatterlist
*sg
;
408 BUG_ON(!valid_dma_direction(direction
));
409 for_each_sg(sglist
, sg
, nents
, i
) {
410 sg
->dma_address
= sg_phys(sg
);
411 __dma_complete_pa_range(sg
->dma_address
, sg
->length
,
416 static dma_addr_t
tile_pci_dma_map_page(struct device
*dev
, struct page
*page
,
417 unsigned long offset
, size_t size
,
418 enum dma_data_direction direction
,
421 BUG_ON(!valid_dma_direction(direction
));
423 BUG_ON(offset
+ size
> PAGE_SIZE
);
424 __dma_prep_page(page
, offset
, size
, direction
);
426 return page_to_pa(page
) + offset
+ get_dma_offset(dev
);
429 static void tile_pci_dma_unmap_page(struct device
*dev
, dma_addr_t dma_address
,
431 enum dma_data_direction direction
,
434 BUG_ON(!valid_dma_direction(direction
));
436 dma_address
-= get_dma_offset(dev
);
438 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address
)),
439 dma_address
& (PAGE_SIZE
- 1), size
, direction
);
442 static void tile_pci_dma_sync_single_for_cpu(struct device
*dev
,
443 dma_addr_t dma_handle
,
445 enum dma_data_direction direction
)
447 BUG_ON(!valid_dma_direction(direction
));
449 dma_handle
-= get_dma_offset(dev
);
451 __dma_complete_pa_range(dma_handle
, size
, direction
);
454 static void tile_pci_dma_sync_single_for_device(struct device
*dev
,
455 dma_addr_t dma_handle
,
457 enum dma_data_direction
460 dma_handle
-= get_dma_offset(dev
);
462 __dma_prep_pa_range(dma_handle
, size
, direction
);
465 static void tile_pci_dma_sync_sg_for_cpu(struct device
*dev
,
466 struct scatterlist
*sglist
,
468 enum dma_data_direction direction
)
470 struct scatterlist
*sg
;
473 BUG_ON(!valid_dma_direction(direction
));
474 WARN_ON(nelems
== 0 || sglist
->length
== 0);
476 for_each_sg(sglist
, sg
, nelems
, i
) {
477 dma_sync_single_for_cpu(dev
, sg
->dma_address
,
478 sg_dma_len(sg
), direction
);
482 static void tile_pci_dma_sync_sg_for_device(struct device
*dev
,
483 struct scatterlist
*sglist
,
485 enum dma_data_direction direction
)
487 struct scatterlist
*sg
;
490 BUG_ON(!valid_dma_direction(direction
));
491 WARN_ON(nelems
== 0 || sglist
->length
== 0);
493 for_each_sg(sglist
, sg
, nelems
, i
) {
494 dma_sync_single_for_device(dev
, sg
->dma_address
,
495 sg_dma_len(sg
), direction
);
500 tile_pci_dma_mapping_error(struct device
*dev
, dma_addr_t dma_addr
)
506 tile_pci_dma_supported(struct device
*dev
, u64 mask
)
511 static struct dma_map_ops tile_pci_default_dma_map_ops
= {
512 .alloc
= tile_pci_dma_alloc_coherent
,
513 .free
= tile_pci_dma_free_coherent
,
514 .map_page
= tile_pci_dma_map_page
,
515 .unmap_page
= tile_pci_dma_unmap_page
,
516 .map_sg
= tile_pci_dma_map_sg
,
517 .unmap_sg
= tile_pci_dma_unmap_sg
,
518 .sync_single_for_cpu
= tile_pci_dma_sync_single_for_cpu
,
519 .sync_single_for_device
= tile_pci_dma_sync_single_for_device
,
520 .sync_sg_for_cpu
= tile_pci_dma_sync_sg_for_cpu
,
521 .sync_sg_for_device
= tile_pci_dma_sync_sg_for_device
,
522 .mapping_error
= tile_pci_dma_mapping_error
,
523 .dma_supported
= tile_pci_dma_supported
526 struct dma_map_ops
*gx_pci_dma_map_ops
= &tile_pci_default_dma_map_ops
;
527 EXPORT_SYMBOL(gx_pci_dma_map_ops
);
529 /* PCI DMA mapping functions for legacy PCI devices */
531 #ifdef CONFIG_SWIOTLB
532 static void *tile_swiotlb_alloc_coherent(struct device
*dev
, size_t size
,
533 dma_addr_t
*dma_handle
, gfp_t gfp
,
537 return swiotlb_alloc_coherent(dev
, size
, dma_handle
, gfp
);
540 static void tile_swiotlb_free_coherent(struct device
*dev
, size_t size
,
541 void *vaddr
, dma_addr_t dma_addr
,
544 swiotlb_free_coherent(dev
, size
, vaddr
, dma_addr
);
547 static struct dma_map_ops pci_swiotlb_dma_ops
= {
548 .alloc
= tile_swiotlb_alloc_coherent
,
549 .free
= tile_swiotlb_free_coherent
,
550 .map_page
= swiotlb_map_page
,
551 .unmap_page
= swiotlb_unmap_page
,
552 .map_sg
= swiotlb_map_sg_attrs
,
553 .unmap_sg
= swiotlb_unmap_sg_attrs
,
554 .sync_single_for_cpu
= swiotlb_sync_single_for_cpu
,
555 .sync_single_for_device
= swiotlb_sync_single_for_device
,
556 .sync_sg_for_cpu
= swiotlb_sync_sg_for_cpu
,
557 .sync_sg_for_device
= swiotlb_sync_sg_for_device
,
558 .dma_supported
= swiotlb_dma_supported
,
559 .mapping_error
= swiotlb_dma_mapping_error
,
562 static struct dma_map_ops pci_hybrid_dma_ops
= {
563 .alloc
= tile_swiotlb_alloc_coherent
,
564 .free
= tile_swiotlb_free_coherent
,
565 .map_page
= tile_pci_dma_map_page
,
566 .unmap_page
= tile_pci_dma_unmap_page
,
567 .map_sg
= tile_pci_dma_map_sg
,
568 .unmap_sg
= tile_pci_dma_unmap_sg
,
569 .sync_single_for_cpu
= tile_pci_dma_sync_single_for_cpu
,
570 .sync_single_for_device
= tile_pci_dma_sync_single_for_device
,
571 .sync_sg_for_cpu
= tile_pci_dma_sync_sg_for_cpu
,
572 .sync_sg_for_device
= tile_pci_dma_sync_sg_for_device
,
573 .mapping_error
= tile_pci_dma_mapping_error
,
574 .dma_supported
= tile_pci_dma_supported
577 struct dma_map_ops
*gx_legacy_pci_dma_map_ops
= &pci_swiotlb_dma_ops
;
578 struct dma_map_ops
*gx_hybrid_pci_dma_map_ops
= &pci_hybrid_dma_ops
;
580 struct dma_map_ops
*gx_legacy_pci_dma_map_ops
;
581 struct dma_map_ops
*gx_hybrid_pci_dma_map_ops
;
583 EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops
);
584 EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops
);
586 int dma_set_mask(struct device
*dev
, u64 mask
)
588 struct dma_map_ops
*dma_ops
= get_dma_ops(dev
);
591 * For PCI devices with 64-bit DMA addressing capability, promote
592 * the dma_ops to hybrid, with the consistent memory DMA space limited
593 * to 32-bit. For 32-bit capable devices, limit the streaming DMA
594 * address range to max_direct_dma_addr.
596 if (dma_ops
== gx_pci_dma_map_ops
||
597 dma_ops
== gx_hybrid_pci_dma_map_ops
||
598 dma_ops
== gx_legacy_pci_dma_map_ops
) {
599 if (mask
== DMA_BIT_MASK(64) &&
600 dma_ops
== gx_legacy_pci_dma_map_ops
)
601 set_dma_ops(dev
, gx_hybrid_pci_dma_map_ops
);
602 else if (mask
> dev
->archdata
.max_direct_dma_addr
)
603 mask
= dev
->archdata
.max_direct_dma_addr
;
606 if (!dev
->dma_mask
|| !dma_supported(dev
, mask
))
609 *dev
->dma_mask
= mask
;
613 EXPORT_SYMBOL(dma_set_mask
);
615 #ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
616 int dma_set_coherent_mask(struct device
*dev
, u64 mask
)
618 struct dma_map_ops
*dma_ops
= get_dma_ops(dev
);
621 * For PCI devices with 64-bit DMA addressing capability, promote
622 * the dma_ops to full capability for both streams and consistent
623 * memory access. For 32-bit capable devices, limit the consistent
624 * memory DMA range to max_direct_dma_addr.
626 if (dma_ops
== gx_pci_dma_map_ops
||
627 dma_ops
== gx_hybrid_pci_dma_map_ops
||
628 dma_ops
== gx_legacy_pci_dma_map_ops
) {
629 if (mask
== DMA_BIT_MASK(64))
630 set_dma_ops(dev
, gx_pci_dma_map_ops
);
631 else if (mask
> dev
->archdata
.max_direct_dma_addr
)
632 mask
= dev
->archdata
.max_direct_dma_addr
;
635 if (!dma_supported(dev
, mask
))
637 dev
->coherent_dma_mask
= mask
;
640 EXPORT_SYMBOL(dma_set_coherent_mask
);
643 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
645 * The generic dma_get_required_mask() uses the highest physical address
646 * (max_pfn) to provide the hint to the PCI drivers regarding 32-bit or
647 * 64-bit DMA configuration. Since TILEGx has I/O TLB/MMU, allowing the
648 * DMAs to use the full 64-bit PCI address space and not limited by
649 * the physical memory space, we always let the PCI devices use
650 * 64-bit DMA if they have that capability, by returning the 64-bit
651 * DMA mask here. The device driver has the option to use 32-bit DMA if
652 * the device is not capable of 64-bit DMA.
654 u64
dma_get_required_mask(struct device
*dev
)
656 return DMA_BIT_MASK(64);
658 EXPORT_SYMBOL_GPL(dma_get_required_mask
);