2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
16 #include <linux/dma-mapping.h>
17 #include <linux/swiotlb.h>
18 #include <linux/vmalloc.h>
19 #include <linux/export.h>
20 #include <asm/tlbflush.h>
21 #include <asm/homecache.h>
23 /* Generic DMA mapping functions: */
26 * Allocate what Linux calls "coherent" memory. On TILEPro this is
27 * uncached memory; on TILE-Gx it is hash-for-home memory.
30 #define PAGE_HOME_DMA PAGE_HOME_UNCACHED
32 #define PAGE_HOME_DMA PAGE_HOME_HASH
35 static void *tile_dma_alloc_coherent(struct device
*dev
, size_t size
,
36 dma_addr_t
*dma_handle
, gfp_t gfp
,
39 u64 dma_mask
= (dev
&& dev
->coherent_dma_mask
) ?
40 dev
->coherent_dma_mask
: DMA_BIT_MASK(32);
41 int node
= dev
? dev_to_node(dev
) : 0;
42 int order
= get_order(size
);
49 * If the mask specifies that the memory be in the first 4 GB, then
50 * we force the allocation to come from the DMA zone. We also
51 * force the node to 0 since that's the only node where the DMA
52 * zone isn't empty. If the mask size is smaller than 32 bits, we
53 * may still not be able to guarantee a suitable memory address, in
54 * which case we will return NULL. But such devices are uncommon.
56 if (dma_mask
<= DMA_BIT_MASK(32)) {
61 pg
= homecache_alloc_pages_node(node
, gfp
, order
, PAGE_HOME_DMA
);
65 addr
= page_to_phys(pg
);
66 if (addr
+ size
> dma_mask
) {
67 __homecache_free_pages(pg
, order
);
73 return page_address(pg
);
77 * Free memory that was allocated with tile_dma_alloc_coherent.
79 static void tile_dma_free_coherent(struct device
*dev
, size_t size
,
80 void *vaddr
, dma_addr_t dma_handle
,
83 homecache_free_pages((unsigned long)vaddr
, get_order(size
));
87 * The map routines "map" the specified address range for DMA
88 * accesses. The memory belongs to the device after this call is
89 * issued, until it is unmapped with dma_unmap_single.
91 * We don't need to do any mapping, we just flush the address range
92 * out of the cache and return a DMA address.
94 * The unmap routines do whatever is necessary before the processor
95 * accesses the memory again, and must be called before the driver
96 * touches the memory. We can get away with a cache invalidate if we
97 * can count on nothing having been touched.
100 /* Set up a single page for DMA access. */
101 static void __dma_prep_page(struct page
*page
, unsigned long offset
,
102 size_t size
, enum dma_data_direction direction
)
105 * Flush the page from cache if necessary.
106 * On tilegx, data is delivered to hash-for-home L3; on tilepro,
107 * data is delivered direct to memory.
109 * NOTE: If we were just doing DMA_TO_DEVICE we could optimize
110 * this to be a "flush" not a "finv" and keep some of the
111 * state in cache across the DMA operation, but it doesn't seem
112 * worth creating the necessary flush_buffer_xxx() infrastructure.
114 int home
= page_home(page
);
121 case PAGE_HOME_UNCACHED
:
126 case PAGE_HOME_IMMUTABLE
:
127 /* Should be going to the device only. */
128 BUG_ON(direction
== DMA_FROM_DEVICE
||
129 direction
== DMA_BIDIRECTIONAL
);
131 case PAGE_HOME_INCOHERENT
:
132 /* Incoherent anyway, so no need to work hard here. */
135 BUG_ON(home
< 0 || home
>= NR_CPUS
);
138 homecache_finv_page(page
);
140 #ifdef DEBUG_ALIGNMENT
141 /* Warn if the region isn't cacheline aligned. */
142 if (offset
& (L2_CACHE_BYTES
- 1) || (size
& (L2_CACHE_BYTES
- 1)))
143 pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n",
144 PFN_PHYS(page_to_pfn(page
)) + offset
, size
);
148 /* Make the page ready to be read by the core. */
149 static void __dma_complete_page(struct page
*page
, unsigned long offset
,
150 size_t size
, enum dma_data_direction direction
)
153 switch (page_home(page
)) {
155 /* I/O device delivered data the way the cpu wanted it. */
157 case PAGE_HOME_INCOHERENT
:
158 /* Incoherent anyway, so no need to work hard here. */
160 case PAGE_HOME_IMMUTABLE
:
161 /* Extra read-only copies are not a problem. */
164 /* Flush the bogus hash-for-home I/O entries to memory. */
165 homecache_finv_map_page(page
, PAGE_HOME_HASH
);
171 static void __dma_prep_pa_range(dma_addr_t dma_addr
, size_t size
,
172 enum dma_data_direction direction
)
174 struct page
*page
= pfn_to_page(PFN_DOWN(dma_addr
));
175 unsigned long offset
= dma_addr
& (PAGE_SIZE
- 1);
176 size_t bytes
= min(size
, (size_t)(PAGE_SIZE
- offset
));
179 __dma_prep_page(page
, offset
, bytes
, direction
);
183 bytes
= min((size_t)PAGE_SIZE
, size
);
187 static void __dma_complete_pa_range(dma_addr_t dma_addr
, size_t size
,
188 enum dma_data_direction direction
)
190 struct page
*page
= pfn_to_page(PFN_DOWN(dma_addr
));
191 unsigned long offset
= dma_addr
& (PAGE_SIZE
- 1);
192 size_t bytes
= min(size
, (size_t)(PAGE_SIZE
- offset
));
195 __dma_complete_page(page
, offset
, bytes
, direction
);
199 bytes
= min((size_t)PAGE_SIZE
, size
);
203 static int tile_dma_map_sg(struct device
*dev
, struct scatterlist
*sglist
,
204 int nents
, enum dma_data_direction direction
,
207 struct scatterlist
*sg
;
210 BUG_ON(!valid_dma_direction(direction
));
212 WARN_ON(nents
== 0 || sglist
->length
== 0);
214 for_each_sg(sglist
, sg
, nents
, i
) {
215 sg
->dma_address
= sg_phys(sg
);
216 #ifdef CONFIG_NEED_SG_DMA_LENGTH
217 sg
->dma_length
= sg
->length
;
219 if (attrs
& DMA_ATTR_SKIP_CPU_SYNC
)
221 __dma_prep_pa_range(sg
->dma_address
, sg
->length
, direction
);
227 static void tile_dma_unmap_sg(struct device
*dev
, struct scatterlist
*sglist
,
228 int nents
, enum dma_data_direction direction
,
231 struct scatterlist
*sg
;
234 BUG_ON(!valid_dma_direction(direction
));
235 for_each_sg(sglist
, sg
, nents
, i
) {
236 sg
->dma_address
= sg_phys(sg
);
237 if (attrs
& DMA_ATTR_SKIP_CPU_SYNC
)
239 __dma_complete_pa_range(sg
->dma_address
, sg
->length
,
244 static dma_addr_t
tile_dma_map_page(struct device
*dev
, struct page
*page
,
245 unsigned long offset
, size_t size
,
246 enum dma_data_direction direction
,
249 BUG_ON(!valid_dma_direction(direction
));
251 BUG_ON(offset
+ size
> PAGE_SIZE
);
252 if (!(attrs
& DMA_ATTR_SKIP_CPU_SYNC
))
253 __dma_prep_page(page
, offset
, size
, direction
);
255 return page_to_pa(page
) + offset
;
258 static void tile_dma_unmap_page(struct device
*dev
, dma_addr_t dma_address
,
259 size_t size
, enum dma_data_direction direction
,
262 BUG_ON(!valid_dma_direction(direction
));
264 if (attrs
& DMA_ATTR_SKIP_CPU_SYNC
)
267 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address
)),
268 dma_address
& (PAGE_SIZE
- 1), size
, direction
);
271 static void tile_dma_sync_single_for_cpu(struct device
*dev
,
272 dma_addr_t dma_handle
,
274 enum dma_data_direction direction
)
276 BUG_ON(!valid_dma_direction(direction
));
278 __dma_complete_pa_range(dma_handle
, size
, direction
);
281 static void tile_dma_sync_single_for_device(struct device
*dev
,
282 dma_addr_t dma_handle
, size_t size
,
283 enum dma_data_direction direction
)
285 __dma_prep_pa_range(dma_handle
, size
, direction
);
288 static void tile_dma_sync_sg_for_cpu(struct device
*dev
,
289 struct scatterlist
*sglist
, int nelems
,
290 enum dma_data_direction direction
)
292 struct scatterlist
*sg
;
295 BUG_ON(!valid_dma_direction(direction
));
296 WARN_ON(nelems
== 0 || sglist
->length
== 0);
298 for_each_sg(sglist
, sg
, nelems
, i
) {
299 dma_sync_single_for_cpu(dev
, sg
->dma_address
,
300 sg_dma_len(sg
), direction
);
304 static void tile_dma_sync_sg_for_device(struct device
*dev
,
305 struct scatterlist
*sglist
, int nelems
,
306 enum dma_data_direction direction
)
308 struct scatterlist
*sg
;
311 BUG_ON(!valid_dma_direction(direction
));
312 WARN_ON(nelems
== 0 || sglist
->length
== 0);
314 for_each_sg(sglist
, sg
, nelems
, i
) {
315 dma_sync_single_for_device(dev
, sg
->dma_address
,
316 sg_dma_len(sg
), direction
);
320 static const struct dma_map_ops tile_default_dma_map_ops
= {
321 .alloc
= tile_dma_alloc_coherent
,
322 .free
= tile_dma_free_coherent
,
323 .map_page
= tile_dma_map_page
,
324 .unmap_page
= tile_dma_unmap_page
,
325 .map_sg
= tile_dma_map_sg
,
326 .unmap_sg
= tile_dma_unmap_sg
,
327 .sync_single_for_cpu
= tile_dma_sync_single_for_cpu
,
328 .sync_single_for_device
= tile_dma_sync_single_for_device
,
329 .sync_sg_for_cpu
= tile_dma_sync_sg_for_cpu
,
330 .sync_sg_for_device
= tile_dma_sync_sg_for_device
,
333 const struct dma_map_ops
*tile_dma_map_ops
= &tile_default_dma_map_ops
;
334 EXPORT_SYMBOL(tile_dma_map_ops
);
336 /* Generic PCI DMA mapping functions */
338 static void *tile_pci_dma_alloc_coherent(struct device
*dev
, size_t size
,
339 dma_addr_t
*dma_handle
, gfp_t gfp
,
342 int node
= dev_to_node(dev
);
343 int order
= get_order(size
);
349 pg
= homecache_alloc_pages_node(node
, gfp
, order
, PAGE_HOME_DMA
);
353 addr
= page_to_phys(pg
);
355 *dma_handle
= addr
+ get_dma_offset(dev
);
357 return page_address(pg
);
361 * Free memory that was allocated with tile_pci_dma_alloc_coherent.
363 static void tile_pci_dma_free_coherent(struct device
*dev
, size_t size
,
364 void *vaddr
, dma_addr_t dma_handle
,
367 homecache_free_pages((unsigned long)vaddr
, get_order(size
));
370 static int tile_pci_dma_map_sg(struct device
*dev
, struct scatterlist
*sglist
,
371 int nents
, enum dma_data_direction direction
,
374 struct scatterlist
*sg
;
377 BUG_ON(!valid_dma_direction(direction
));
379 WARN_ON(nents
== 0 || sglist
->length
== 0);
381 for_each_sg(sglist
, sg
, nents
, i
) {
382 sg
->dma_address
= sg_phys(sg
);
383 __dma_prep_pa_range(sg
->dma_address
, sg
->length
, direction
);
385 sg
->dma_address
= sg
->dma_address
+ get_dma_offset(dev
);
386 #ifdef CONFIG_NEED_SG_DMA_LENGTH
387 sg
->dma_length
= sg
->length
;
394 static void tile_pci_dma_unmap_sg(struct device
*dev
,
395 struct scatterlist
*sglist
, int nents
,
396 enum dma_data_direction direction
,
399 struct scatterlist
*sg
;
402 BUG_ON(!valid_dma_direction(direction
));
403 for_each_sg(sglist
, sg
, nents
, i
) {
404 sg
->dma_address
= sg_phys(sg
);
405 __dma_complete_pa_range(sg
->dma_address
, sg
->length
,
410 static dma_addr_t
tile_pci_dma_map_page(struct device
*dev
, struct page
*page
,
411 unsigned long offset
, size_t size
,
412 enum dma_data_direction direction
,
415 BUG_ON(!valid_dma_direction(direction
));
417 BUG_ON(offset
+ size
> PAGE_SIZE
);
418 __dma_prep_page(page
, offset
, size
, direction
);
420 return page_to_pa(page
) + offset
+ get_dma_offset(dev
);
423 static void tile_pci_dma_unmap_page(struct device
*dev
, dma_addr_t dma_address
,
425 enum dma_data_direction direction
,
428 BUG_ON(!valid_dma_direction(direction
));
430 dma_address
-= get_dma_offset(dev
);
432 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address
)),
433 dma_address
& (PAGE_SIZE
- 1), size
, direction
);
436 static void tile_pci_dma_sync_single_for_cpu(struct device
*dev
,
437 dma_addr_t dma_handle
,
439 enum dma_data_direction direction
)
441 BUG_ON(!valid_dma_direction(direction
));
443 dma_handle
-= get_dma_offset(dev
);
445 __dma_complete_pa_range(dma_handle
, size
, direction
);
448 static void tile_pci_dma_sync_single_for_device(struct device
*dev
,
449 dma_addr_t dma_handle
,
451 enum dma_data_direction
454 dma_handle
-= get_dma_offset(dev
);
456 __dma_prep_pa_range(dma_handle
, size
, direction
);
459 static void tile_pci_dma_sync_sg_for_cpu(struct device
*dev
,
460 struct scatterlist
*sglist
,
462 enum dma_data_direction direction
)
464 struct scatterlist
*sg
;
467 BUG_ON(!valid_dma_direction(direction
));
468 WARN_ON(nelems
== 0 || sglist
->length
== 0);
470 for_each_sg(sglist
, sg
, nelems
, i
) {
471 dma_sync_single_for_cpu(dev
, sg
->dma_address
,
472 sg_dma_len(sg
), direction
);
476 static void tile_pci_dma_sync_sg_for_device(struct device
*dev
,
477 struct scatterlist
*sglist
,
479 enum dma_data_direction direction
)
481 struct scatterlist
*sg
;
484 BUG_ON(!valid_dma_direction(direction
));
485 WARN_ON(nelems
== 0 || sglist
->length
== 0);
487 for_each_sg(sglist
, sg
, nelems
, i
) {
488 dma_sync_single_for_device(dev
, sg
->dma_address
,
489 sg_dma_len(sg
), direction
);
493 static const struct dma_map_ops tile_pci_default_dma_map_ops
= {
494 .alloc
= tile_pci_dma_alloc_coherent
,
495 .free
= tile_pci_dma_free_coherent
,
496 .map_page
= tile_pci_dma_map_page
,
497 .unmap_page
= tile_pci_dma_unmap_page
,
498 .map_sg
= tile_pci_dma_map_sg
,
499 .unmap_sg
= tile_pci_dma_unmap_sg
,
500 .sync_single_for_cpu
= tile_pci_dma_sync_single_for_cpu
,
501 .sync_single_for_device
= tile_pci_dma_sync_single_for_device
,
502 .sync_sg_for_cpu
= tile_pci_dma_sync_sg_for_cpu
,
503 .sync_sg_for_device
= tile_pci_dma_sync_sg_for_device
,
506 const struct dma_map_ops
*gx_pci_dma_map_ops
= &tile_pci_default_dma_map_ops
;
507 EXPORT_SYMBOL(gx_pci_dma_map_ops
);
509 /* PCI DMA mapping functions for legacy PCI devices */
511 #ifdef CONFIG_SWIOTLB
512 static void *tile_swiotlb_alloc_coherent(struct device
*dev
, size_t size
,
513 dma_addr_t
*dma_handle
, gfp_t gfp
,
517 return swiotlb_alloc_coherent(dev
, size
, dma_handle
, gfp
);
520 static void tile_swiotlb_free_coherent(struct device
*dev
, size_t size
,
521 void *vaddr
, dma_addr_t dma_addr
,
524 swiotlb_free_coherent(dev
, size
, vaddr
, dma_addr
);
527 static const struct dma_map_ops pci_swiotlb_dma_ops
= {
528 .alloc
= tile_swiotlb_alloc_coherent
,
529 .free
= tile_swiotlb_free_coherent
,
530 .map_page
= swiotlb_map_page
,
531 .unmap_page
= swiotlb_unmap_page
,
532 .map_sg
= swiotlb_map_sg_attrs
,
533 .unmap_sg
= swiotlb_unmap_sg_attrs
,
534 .sync_single_for_cpu
= swiotlb_sync_single_for_cpu
,
535 .sync_single_for_device
= swiotlb_sync_single_for_device
,
536 .sync_sg_for_cpu
= swiotlb_sync_sg_for_cpu
,
537 .sync_sg_for_device
= swiotlb_sync_sg_for_device
,
538 .dma_supported
= swiotlb_dma_supported
,
539 .mapping_error
= swiotlb_dma_mapping_error
,
542 static const struct dma_map_ops pci_hybrid_dma_ops
= {
543 .alloc
= tile_swiotlb_alloc_coherent
,
544 .free
= tile_swiotlb_free_coherent
,
545 .map_page
= tile_pci_dma_map_page
,
546 .unmap_page
= tile_pci_dma_unmap_page
,
547 .map_sg
= tile_pci_dma_map_sg
,
548 .unmap_sg
= tile_pci_dma_unmap_sg
,
549 .sync_single_for_cpu
= tile_pci_dma_sync_single_for_cpu
,
550 .sync_single_for_device
= tile_pci_dma_sync_single_for_device
,
551 .sync_sg_for_cpu
= tile_pci_dma_sync_sg_for_cpu
,
552 .sync_sg_for_device
= tile_pci_dma_sync_sg_for_device
,
555 const struct dma_map_ops
*gx_legacy_pci_dma_map_ops
= &pci_swiotlb_dma_ops
;
556 const struct dma_map_ops
*gx_hybrid_pci_dma_map_ops
= &pci_hybrid_dma_ops
;
558 const struct dma_map_ops
*gx_legacy_pci_dma_map_ops
;
559 const struct dma_map_ops
*gx_hybrid_pci_dma_map_ops
;
561 EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops
);
562 EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops
);
564 int dma_set_mask(struct device
*dev
, u64 mask
)
566 const struct dma_map_ops
*dma_ops
= get_dma_ops(dev
);
569 * For PCI devices with 64-bit DMA addressing capability, promote
570 * the dma_ops to hybrid, with the consistent memory DMA space limited
571 * to 32-bit. For 32-bit capable devices, limit the streaming DMA
572 * address range to max_direct_dma_addr.
574 if (dma_ops
== gx_pci_dma_map_ops
||
575 dma_ops
== gx_hybrid_pci_dma_map_ops
||
576 dma_ops
== gx_legacy_pci_dma_map_ops
) {
577 if (mask
== DMA_BIT_MASK(64) &&
578 dma_ops
== gx_legacy_pci_dma_map_ops
)
579 set_dma_ops(dev
, gx_hybrid_pci_dma_map_ops
);
580 else if (mask
> dev
->archdata
.max_direct_dma_addr
)
581 mask
= dev
->archdata
.max_direct_dma_addr
;
584 if (!dev
->dma_mask
|| !dma_supported(dev
, mask
))
587 *dev
->dma_mask
= mask
;
591 EXPORT_SYMBOL(dma_set_mask
);
593 #ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
594 int dma_set_coherent_mask(struct device
*dev
, u64 mask
)
596 const struct dma_map_ops
*dma_ops
= get_dma_ops(dev
);
599 * For PCI devices with 64-bit DMA addressing capability, promote
600 * the dma_ops to full capability for both streams and consistent
601 * memory access. For 32-bit capable devices, limit the consistent
602 * memory DMA range to max_direct_dma_addr.
604 if (dma_ops
== gx_pci_dma_map_ops
||
605 dma_ops
== gx_hybrid_pci_dma_map_ops
||
606 dma_ops
== gx_legacy_pci_dma_map_ops
) {
607 if (mask
== DMA_BIT_MASK(64))
608 set_dma_ops(dev
, gx_pci_dma_map_ops
);
609 else if (mask
> dev
->archdata
.max_direct_dma_addr
)
610 mask
= dev
->archdata
.max_direct_dma_addr
;
613 if (!dma_supported(dev
, mask
))
615 dev
->coherent_dma_mask
= mask
;
618 EXPORT_SYMBOL(dma_set_coherent_mask
);
621 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
623 * The generic dma_get_required_mask() uses the highest physical address
624 * (max_pfn) to provide the hint to the PCI drivers regarding 32-bit or
625 * 64-bit DMA configuration. Since TILEGx has I/O TLB/MMU, allowing the
626 * DMAs to use the full 64-bit PCI address space and not limited by
627 * the physical memory space, we always let the PCI devices use
628 * 64-bit DMA if they have that capability, by returning the 64-bit
629 * DMA mask here. The device driver has the option to use 32-bit DMA if
630 * the device is not capable of 64-bit DMA.
632 u64
dma_get_required_mask(struct device
*dev
)
634 return DMA_BIT_MASK(64);
636 EXPORT_SYMBOL_GPL(dma_get_required_mask
);