1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright IBM Corp. 2012
6 * Jan Glauber <jang@linux.vnet.ibm.com>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/export.h>
12 #include <linux/iommu-helper.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/vmalloc.h>
15 #include <linux/pci.h>
16 #include <asm/pci_dma.h>
18 #define S390_MAPPING_ERROR (~(dma_addr_t) 0x0)
20 static struct kmem_cache
*dma_region_table_cache
;
21 static struct kmem_cache
*dma_page_table_cache
;
22 static int s390_iommu_strict
;
24 static int zpci_refresh_global(struct zpci_dev
*zdev
)
26 return zpci_refresh_trans((u64
) zdev
->fh
<< 32, zdev
->start_dma
,
27 zdev
->iommu_pages
* PAGE_SIZE
);
30 unsigned long *dma_alloc_cpu_table(void)
32 unsigned long *table
, *entry
;
34 table
= kmem_cache_alloc(dma_region_table_cache
, GFP_ATOMIC
);
38 for (entry
= table
; entry
< table
+ ZPCI_TABLE_ENTRIES
; entry
++)
39 *entry
= ZPCI_TABLE_INVALID
;
43 static void dma_free_cpu_table(void *table
)
45 kmem_cache_free(dma_region_table_cache
, table
);
48 static unsigned long *dma_alloc_page_table(void)
50 unsigned long *table
, *entry
;
52 table
= kmem_cache_alloc(dma_page_table_cache
, GFP_ATOMIC
);
56 for (entry
= table
; entry
< table
+ ZPCI_PT_ENTRIES
; entry
++)
57 *entry
= ZPCI_PTE_INVALID
;
61 static void dma_free_page_table(void *table
)
63 kmem_cache_free(dma_page_table_cache
, table
);
66 static unsigned long *dma_get_seg_table_origin(unsigned long *entry
)
70 if (reg_entry_isvalid(*entry
))
71 sto
= get_rt_sto(*entry
);
73 sto
= dma_alloc_cpu_table();
77 set_rt_sto(entry
, sto
);
78 validate_rt_entry(entry
);
79 entry_clr_protected(entry
);
84 static unsigned long *dma_get_page_table_origin(unsigned long *entry
)
88 if (reg_entry_isvalid(*entry
))
89 pto
= get_st_pto(*entry
);
91 pto
= dma_alloc_page_table();
94 set_st_pto(entry
, pto
);
95 validate_st_entry(entry
);
96 entry_clr_protected(entry
);
101 unsigned long *dma_walk_cpu_trans(unsigned long *rto
, dma_addr_t dma_addr
)
103 unsigned long *sto
, *pto
;
104 unsigned int rtx
, sx
, px
;
106 rtx
= calc_rtx(dma_addr
);
107 sto
= dma_get_seg_table_origin(&rto
[rtx
]);
111 sx
= calc_sx(dma_addr
);
112 pto
= dma_get_page_table_origin(&sto
[sx
]);
116 px
= calc_px(dma_addr
);
120 void dma_update_cpu_trans(unsigned long *entry
, void *page_addr
, int flags
)
122 if (flags
& ZPCI_PTE_INVALID
) {
123 invalidate_pt_entry(entry
);
125 set_pt_pfaa(entry
, page_addr
);
126 validate_pt_entry(entry
);
129 if (flags
& ZPCI_TABLE_PROTECTED
)
130 entry_set_protected(entry
);
132 entry_clr_protected(entry
);
135 static int __dma_update_trans(struct zpci_dev
*zdev
, unsigned long pa
,
136 dma_addr_t dma_addr
, size_t size
, int flags
)
138 unsigned int nr_pages
= PAGE_ALIGN(size
) >> PAGE_SHIFT
;
139 u8
*page_addr
= (u8
*) (pa
& PAGE_MASK
);
140 unsigned long irq_flags
;
141 unsigned long *entry
;
147 spin_lock_irqsave(&zdev
->dma_table_lock
, irq_flags
);
148 if (!zdev
->dma_table
) {
153 for (i
= 0; i
< nr_pages
; i
++) {
154 entry
= dma_walk_cpu_trans(zdev
->dma_table
, dma_addr
);
159 dma_update_cpu_trans(entry
, page_addr
, flags
);
160 page_addr
+= PAGE_SIZE
;
161 dma_addr
+= PAGE_SIZE
;
165 if (rc
&& ((flags
& ZPCI_PTE_VALID_MASK
) == ZPCI_PTE_VALID
)) {
166 flags
= ZPCI_PTE_INVALID
;
168 page_addr
-= PAGE_SIZE
;
169 dma_addr
-= PAGE_SIZE
;
170 entry
= dma_walk_cpu_trans(zdev
->dma_table
, dma_addr
);
173 dma_update_cpu_trans(entry
, page_addr
, flags
);
177 spin_unlock_irqrestore(&zdev
->dma_table_lock
, irq_flags
);
181 static int __dma_purge_tlb(struct zpci_dev
*zdev
, dma_addr_t dma_addr
,
182 size_t size
, int flags
)
184 unsigned long irqflags
;
188 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
189 * translations when previously invalid translation-table entries are
190 * validated. With lazy unmap, rpcit is skipped for previously valid
191 * entries, but a global rpcit is then required before any address can
192 * be re-used, i.e. after each iommu bitmap wrap-around.
194 if ((flags
& ZPCI_PTE_VALID_MASK
) == ZPCI_PTE_VALID
) {
195 if (!zdev
->tlb_refresh
)
198 if (!s390_iommu_strict
)
202 ret
= zpci_refresh_trans((u64
) zdev
->fh
<< 32, dma_addr
,
204 if (ret
== -ENOMEM
&& !s390_iommu_strict
) {
205 /* enable the hypervisor to free some resources */
206 if (zpci_refresh_global(zdev
))
209 spin_lock_irqsave(&zdev
->iommu_bitmap_lock
, irqflags
);
210 bitmap_andnot(zdev
->iommu_bitmap
, zdev
->iommu_bitmap
,
211 zdev
->lazy_bitmap
, zdev
->iommu_pages
);
212 bitmap_zero(zdev
->lazy_bitmap
, zdev
->iommu_pages
);
213 spin_unlock_irqrestore(&zdev
->iommu_bitmap_lock
, irqflags
);
220 static int dma_update_trans(struct zpci_dev
*zdev
, unsigned long pa
,
221 dma_addr_t dma_addr
, size_t size
, int flags
)
225 rc
= __dma_update_trans(zdev
, pa
, dma_addr
, size
, flags
);
229 rc
= __dma_purge_tlb(zdev
, dma_addr
, size
, flags
);
230 if (rc
&& ((flags
& ZPCI_PTE_VALID_MASK
) == ZPCI_PTE_VALID
))
231 __dma_update_trans(zdev
, pa
, dma_addr
, size
, ZPCI_PTE_INVALID
);
236 void dma_free_seg_table(unsigned long entry
)
238 unsigned long *sto
= get_rt_sto(entry
);
241 for (sx
= 0; sx
< ZPCI_TABLE_ENTRIES
; sx
++)
242 if (reg_entry_isvalid(sto
[sx
]))
243 dma_free_page_table(get_st_pto(sto
[sx
]));
245 dma_free_cpu_table(sto
);
248 void dma_cleanup_tables(unsigned long *table
)
255 for (rtx
= 0; rtx
< ZPCI_TABLE_ENTRIES
; rtx
++)
256 if (reg_entry_isvalid(table
[rtx
]))
257 dma_free_seg_table(table
[rtx
]);
259 dma_free_cpu_table(table
);
262 static unsigned long __dma_alloc_iommu(struct device
*dev
,
263 unsigned long start
, int size
)
265 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
266 unsigned long boundary_size
;
268 boundary_size
= ALIGN(dma_get_seg_boundary(dev
) + 1,
269 PAGE_SIZE
) >> PAGE_SHIFT
;
270 return iommu_area_alloc(zdev
->iommu_bitmap
, zdev
->iommu_pages
,
271 start
, size
, zdev
->start_dma
>> PAGE_SHIFT
,
275 static dma_addr_t
dma_alloc_address(struct device
*dev
, int size
)
277 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
278 unsigned long offset
, flags
;
280 spin_lock_irqsave(&zdev
->iommu_bitmap_lock
, flags
);
281 offset
= __dma_alloc_iommu(dev
, zdev
->next_bit
, size
);
283 if (!s390_iommu_strict
) {
284 /* global flush before DMA addresses are reused */
285 if (zpci_refresh_global(zdev
))
288 bitmap_andnot(zdev
->iommu_bitmap
, zdev
->iommu_bitmap
,
289 zdev
->lazy_bitmap
, zdev
->iommu_pages
);
290 bitmap_zero(zdev
->lazy_bitmap
, zdev
->iommu_pages
);
293 offset
= __dma_alloc_iommu(dev
, 0, size
);
297 zdev
->next_bit
= offset
+ size
;
298 spin_unlock_irqrestore(&zdev
->iommu_bitmap_lock
, flags
);
300 return zdev
->start_dma
+ offset
* PAGE_SIZE
;
303 spin_unlock_irqrestore(&zdev
->iommu_bitmap_lock
, flags
);
304 return S390_MAPPING_ERROR
;
307 static void dma_free_address(struct device
*dev
, dma_addr_t dma_addr
, int size
)
309 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
310 unsigned long flags
, offset
;
312 offset
= (dma_addr
- zdev
->start_dma
) >> PAGE_SHIFT
;
314 spin_lock_irqsave(&zdev
->iommu_bitmap_lock
, flags
);
315 if (!zdev
->iommu_bitmap
)
318 if (s390_iommu_strict
)
319 bitmap_clear(zdev
->iommu_bitmap
, offset
, size
);
321 bitmap_set(zdev
->lazy_bitmap
, offset
, size
);
324 spin_unlock_irqrestore(&zdev
->iommu_bitmap_lock
, flags
);
327 static inline void zpci_err_dma(unsigned long rc
, unsigned long addr
)
332 } __packed data
= {rc
, addr
};
334 zpci_err_hex(&data
, sizeof(data
));
337 static dma_addr_t
s390_dma_map_pages(struct device
*dev
, struct page
*page
,
338 unsigned long offset
, size_t size
,
339 enum dma_data_direction direction
,
342 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
343 unsigned long pa
= page_to_phys(page
) + offset
;
344 int flags
= ZPCI_PTE_VALID
;
345 unsigned long nr_pages
;
349 /* This rounds up number of pages based on size and offset */
350 nr_pages
= iommu_num_pages(pa
, size
, PAGE_SIZE
);
351 dma_addr
= dma_alloc_address(dev
, nr_pages
);
352 if (dma_addr
== S390_MAPPING_ERROR
) {
357 /* Use rounded up size */
358 size
= nr_pages
* PAGE_SIZE
;
360 if (direction
== DMA_NONE
|| direction
== DMA_TO_DEVICE
)
361 flags
|= ZPCI_TABLE_PROTECTED
;
363 ret
= dma_update_trans(zdev
, pa
, dma_addr
, size
, flags
);
367 atomic64_add(nr_pages
, &zdev
->mapped_pages
);
368 return dma_addr
+ (offset
& ~PAGE_MASK
);
371 dma_free_address(dev
, dma_addr
, nr_pages
);
373 zpci_err("map error:\n");
374 zpci_err_dma(ret
, pa
);
375 return S390_MAPPING_ERROR
;
378 static void s390_dma_unmap_pages(struct device
*dev
, dma_addr_t dma_addr
,
379 size_t size
, enum dma_data_direction direction
,
382 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
385 npages
= iommu_num_pages(dma_addr
, size
, PAGE_SIZE
);
386 dma_addr
= dma_addr
& PAGE_MASK
;
387 ret
= dma_update_trans(zdev
, 0, dma_addr
, npages
* PAGE_SIZE
,
390 zpci_err("unmap error:\n");
391 zpci_err_dma(ret
, dma_addr
);
395 atomic64_add(npages
, &zdev
->unmapped_pages
);
396 dma_free_address(dev
, dma_addr
, npages
);
399 static void *s390_dma_alloc(struct device
*dev
, size_t size
,
400 dma_addr_t
*dma_handle
, gfp_t flag
,
403 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
408 size
= PAGE_ALIGN(size
);
409 page
= alloc_pages(flag
, get_order(size
));
413 pa
= page_to_phys(page
);
414 map
= s390_dma_map_pages(dev
, page
, 0, size
, DMA_BIDIRECTIONAL
, 0);
415 if (dma_mapping_error(dev
, map
)) {
416 free_pages(pa
, get_order(size
));
420 atomic64_add(size
/ PAGE_SIZE
, &zdev
->allocated_pages
);
426 static void s390_dma_free(struct device
*dev
, size_t size
,
427 void *pa
, dma_addr_t dma_handle
,
430 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
432 size
= PAGE_ALIGN(size
);
433 atomic64_sub(size
/ PAGE_SIZE
, &zdev
->allocated_pages
);
434 s390_dma_unmap_pages(dev
, dma_handle
, size
, DMA_BIDIRECTIONAL
, 0);
435 free_pages((unsigned long) pa
, get_order(size
));
438 /* Map a segment into a contiguous dma address area */
439 static int __s390_dma_map_sg(struct device
*dev
, struct scatterlist
*sg
,
440 size_t size
, dma_addr_t
*handle
,
441 enum dma_data_direction dir
)
443 unsigned long nr_pages
= PAGE_ALIGN(size
) >> PAGE_SHIFT
;
444 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
445 dma_addr_t dma_addr_base
, dma_addr
;
446 int flags
= ZPCI_PTE_VALID
;
447 struct scatterlist
*s
;
448 unsigned long pa
= 0;
451 dma_addr_base
= dma_alloc_address(dev
, nr_pages
);
452 if (dma_addr_base
== S390_MAPPING_ERROR
)
455 dma_addr
= dma_addr_base
;
456 if (dir
== DMA_NONE
|| dir
== DMA_TO_DEVICE
)
457 flags
|= ZPCI_TABLE_PROTECTED
;
459 for (s
= sg
; dma_addr
< dma_addr_base
+ size
; s
= sg_next(s
)) {
460 pa
= page_to_phys(sg_page(s
));
461 ret
= __dma_update_trans(zdev
, pa
, dma_addr
,
462 s
->offset
+ s
->length
, flags
);
466 dma_addr
+= s
->offset
+ s
->length
;
468 ret
= __dma_purge_tlb(zdev
, dma_addr_base
, size
, flags
);
472 *handle
= dma_addr_base
;
473 atomic64_add(nr_pages
, &zdev
->mapped_pages
);
478 dma_update_trans(zdev
, 0, dma_addr_base
, dma_addr
- dma_addr_base
,
480 dma_free_address(dev
, dma_addr_base
, nr_pages
);
481 zpci_err("map error:\n");
482 zpci_err_dma(ret
, pa
);
486 static int s390_dma_map_sg(struct device
*dev
, struct scatterlist
*sg
,
487 int nr_elements
, enum dma_data_direction dir
,
490 struct scatterlist
*s
= sg
, *start
= sg
, *dma
= sg
;
491 unsigned int max
= dma_get_max_seg_size(dev
);
492 unsigned int size
= s
->offset
+ s
->length
;
493 unsigned int offset
= s
->offset
;
496 for (i
= 1; i
< nr_elements
; i
++) {
499 s
->dma_address
= S390_MAPPING_ERROR
;
502 if (s
->offset
|| (size
& ~PAGE_MASK
) ||
503 size
+ s
->length
> max
) {
504 if (__s390_dma_map_sg(dev
, start
, size
,
505 &dma
->dma_address
, dir
))
508 dma
->dma_address
+= offset
;
509 dma
->dma_length
= size
- offset
;
511 size
= offset
= s
->offset
;
518 if (__s390_dma_map_sg(dev
, start
, size
, &dma
->dma_address
, dir
))
521 dma
->dma_address
+= offset
;
522 dma
->dma_length
= size
- offset
;
526 for_each_sg(sg
, s
, count
, i
)
527 s390_dma_unmap_pages(dev
, sg_dma_address(s
), sg_dma_len(s
),
533 static void s390_dma_unmap_sg(struct device
*dev
, struct scatterlist
*sg
,
534 int nr_elements
, enum dma_data_direction dir
,
537 struct scatterlist
*s
;
540 for_each_sg(sg
, s
, nr_elements
, i
) {
542 s390_dma_unmap_pages(dev
, s
->dma_address
, s
->dma_length
,
549 static int s390_mapping_error(struct device
*dev
, dma_addr_t dma_addr
)
551 return dma_addr
== S390_MAPPING_ERROR
;
554 int zpci_dma_init_device(struct zpci_dev
*zdev
)
559 * At this point, if the device is part of an IOMMU domain, this would
560 * be a strong hint towards a bug in the IOMMU API (common) code and/or
561 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
563 WARN_ON(zdev
->s390_domain
);
565 spin_lock_init(&zdev
->iommu_bitmap_lock
);
566 spin_lock_init(&zdev
->dma_table_lock
);
568 zdev
->dma_table
= dma_alloc_cpu_table();
569 if (!zdev
->dma_table
) {
575 * Restrict the iommu bitmap size to the minimum of the following:
577 * - 3-level pagetable address limit minus start_dma offset
578 * - DMA address range allowed by the hardware (clp query pci fn)
580 * Also set zdev->end_dma to the actual end address of the usable
581 * range, instead of the theoretical maximum as reported by hardware.
583 zdev
->start_dma
= PAGE_ALIGN(zdev
->start_dma
);
584 zdev
->iommu_size
= min3((u64
) high_memory
,
585 ZPCI_TABLE_SIZE_RT
- zdev
->start_dma
,
586 zdev
->end_dma
- zdev
->start_dma
+ 1);
587 zdev
->end_dma
= zdev
->start_dma
+ zdev
->iommu_size
- 1;
588 zdev
->iommu_pages
= zdev
->iommu_size
>> PAGE_SHIFT
;
589 zdev
->iommu_bitmap
= vzalloc(zdev
->iommu_pages
/ 8);
590 if (!zdev
->iommu_bitmap
) {
594 if (!s390_iommu_strict
) {
595 zdev
->lazy_bitmap
= vzalloc(zdev
->iommu_pages
/ 8);
596 if (!zdev
->lazy_bitmap
) {
602 rc
= zpci_register_ioat(zdev
, 0, zdev
->start_dma
, zdev
->end_dma
,
603 (u64
) zdev
->dma_table
);
609 vfree(zdev
->iommu_bitmap
);
610 zdev
->iommu_bitmap
= NULL
;
611 vfree(zdev
->lazy_bitmap
);
612 zdev
->lazy_bitmap
= NULL
;
614 dma_free_cpu_table(zdev
->dma_table
);
615 zdev
->dma_table
= NULL
;
620 void zpci_dma_exit_device(struct zpci_dev
*zdev
)
623 * At this point, if the device is part of an IOMMU domain, this would
624 * be a strong hint towards a bug in the IOMMU API (common) code and/or
625 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
627 WARN_ON(zdev
->s390_domain
);
629 if (zpci_unregister_ioat(zdev
, 0))
632 dma_cleanup_tables(zdev
->dma_table
);
633 zdev
->dma_table
= NULL
;
634 vfree(zdev
->iommu_bitmap
);
635 zdev
->iommu_bitmap
= NULL
;
636 vfree(zdev
->lazy_bitmap
);
637 zdev
->lazy_bitmap
= NULL
;
642 static int __init
dma_alloc_cpu_table_caches(void)
644 dma_region_table_cache
= kmem_cache_create("PCI_DMA_region_tables",
645 ZPCI_TABLE_SIZE
, ZPCI_TABLE_ALIGN
,
647 if (!dma_region_table_cache
)
650 dma_page_table_cache
= kmem_cache_create("PCI_DMA_page_tables",
651 ZPCI_PT_SIZE
, ZPCI_PT_ALIGN
,
653 if (!dma_page_table_cache
) {
654 kmem_cache_destroy(dma_region_table_cache
);
660 int __init
zpci_dma_init(void)
662 return dma_alloc_cpu_table_caches();
665 void zpci_dma_exit(void)
667 kmem_cache_destroy(dma_page_table_cache
);
668 kmem_cache_destroy(dma_region_table_cache
);
671 const struct dma_map_ops s390_pci_dma_ops
= {
672 .alloc
= s390_dma_alloc
,
673 .free
= s390_dma_free
,
674 .map_sg
= s390_dma_map_sg
,
675 .unmap_sg
= s390_dma_unmap_sg
,
676 .map_page
= s390_dma_map_pages
,
677 .unmap_page
= s390_dma_unmap_pages
,
678 .mapping_error
= s390_mapping_error
,
679 /* dma_supported is unconditionally true without a callback */
681 EXPORT_SYMBOL_GPL(s390_pci_dma_ops
);
683 static int __init
s390_iommu_setup(char *str
)
685 if (!strncmp(str
, "strict", 6))
686 s390_iommu_strict
= 1;
690 __setup("s390_iommu=", s390_iommu_setup
);