1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright IBM Corp. 2012
6 * Jan Glauber <jang@linux.vnet.ibm.com>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/export.h>
12 #include <linux/iommu-helper.h>
13 #include <linux/dma-map-ops.h>
14 #include <linux/vmalloc.h>
15 #include <linux/pci.h>
16 #include <asm/pci_dma.h>
18 static struct kmem_cache
*dma_region_table_cache
;
19 static struct kmem_cache
*dma_page_table_cache
;
20 static int s390_iommu_strict
;
22 static int zpci_refresh_global(struct zpci_dev
*zdev
)
24 return zpci_refresh_trans((u64
) zdev
->fh
<< 32, zdev
->start_dma
,
25 zdev
->iommu_pages
* PAGE_SIZE
);
28 unsigned long *dma_alloc_cpu_table(void)
30 unsigned long *table
, *entry
;
32 table
= kmem_cache_alloc(dma_region_table_cache
, GFP_ATOMIC
);
36 for (entry
= table
; entry
< table
+ ZPCI_TABLE_ENTRIES
; entry
++)
37 *entry
= ZPCI_TABLE_INVALID
;
41 static void dma_free_cpu_table(void *table
)
43 kmem_cache_free(dma_region_table_cache
, table
);
46 static unsigned long *dma_alloc_page_table(void)
48 unsigned long *table
, *entry
;
50 table
= kmem_cache_alloc(dma_page_table_cache
, GFP_ATOMIC
);
54 for (entry
= table
; entry
< table
+ ZPCI_PT_ENTRIES
; entry
++)
55 *entry
= ZPCI_PTE_INVALID
;
59 static void dma_free_page_table(void *table
)
61 kmem_cache_free(dma_page_table_cache
, table
);
64 static unsigned long *dma_get_seg_table_origin(unsigned long *entry
)
68 if (reg_entry_isvalid(*entry
))
69 sto
= get_rt_sto(*entry
);
71 sto
= dma_alloc_cpu_table();
75 set_rt_sto(entry
, sto
);
76 validate_rt_entry(entry
);
77 entry_clr_protected(entry
);
82 static unsigned long *dma_get_page_table_origin(unsigned long *entry
)
86 if (reg_entry_isvalid(*entry
))
87 pto
= get_st_pto(*entry
);
89 pto
= dma_alloc_page_table();
92 set_st_pto(entry
, pto
);
93 validate_st_entry(entry
);
94 entry_clr_protected(entry
);
99 unsigned long *dma_walk_cpu_trans(unsigned long *rto
, dma_addr_t dma_addr
)
101 unsigned long *sto
, *pto
;
102 unsigned int rtx
, sx
, px
;
104 rtx
= calc_rtx(dma_addr
);
105 sto
= dma_get_seg_table_origin(&rto
[rtx
]);
109 sx
= calc_sx(dma_addr
);
110 pto
= dma_get_page_table_origin(&sto
[sx
]);
114 px
= calc_px(dma_addr
);
118 void dma_update_cpu_trans(unsigned long *entry
, void *page_addr
, int flags
)
120 if (flags
& ZPCI_PTE_INVALID
) {
121 invalidate_pt_entry(entry
);
123 set_pt_pfaa(entry
, page_addr
);
124 validate_pt_entry(entry
);
127 if (flags
& ZPCI_TABLE_PROTECTED
)
128 entry_set_protected(entry
);
130 entry_clr_protected(entry
);
133 static int __dma_update_trans(struct zpci_dev
*zdev
, unsigned long pa
,
134 dma_addr_t dma_addr
, size_t size
, int flags
)
136 unsigned int nr_pages
= PAGE_ALIGN(size
) >> PAGE_SHIFT
;
137 u8
*page_addr
= (u8
*) (pa
& PAGE_MASK
);
138 unsigned long irq_flags
;
139 unsigned long *entry
;
145 spin_lock_irqsave(&zdev
->dma_table_lock
, irq_flags
);
146 if (!zdev
->dma_table
) {
151 for (i
= 0; i
< nr_pages
; i
++) {
152 entry
= dma_walk_cpu_trans(zdev
->dma_table
, dma_addr
);
157 dma_update_cpu_trans(entry
, page_addr
, flags
);
158 page_addr
+= PAGE_SIZE
;
159 dma_addr
+= PAGE_SIZE
;
163 if (rc
&& ((flags
& ZPCI_PTE_VALID_MASK
) == ZPCI_PTE_VALID
)) {
164 flags
= ZPCI_PTE_INVALID
;
166 page_addr
-= PAGE_SIZE
;
167 dma_addr
-= PAGE_SIZE
;
168 entry
= dma_walk_cpu_trans(zdev
->dma_table
, dma_addr
);
171 dma_update_cpu_trans(entry
, page_addr
, flags
);
175 spin_unlock_irqrestore(&zdev
->dma_table_lock
, irq_flags
);
179 static int __dma_purge_tlb(struct zpci_dev
*zdev
, dma_addr_t dma_addr
,
180 size_t size
, int flags
)
182 unsigned long irqflags
;
186 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
187 * translations when previously invalid translation-table entries are
188 * validated. With lazy unmap, rpcit is skipped for previously valid
189 * entries, but a global rpcit is then required before any address can
190 * be re-used, i.e. after each iommu bitmap wrap-around.
192 if ((flags
& ZPCI_PTE_VALID_MASK
) == ZPCI_PTE_VALID
) {
193 if (!zdev
->tlb_refresh
)
196 if (!s390_iommu_strict
)
200 ret
= zpci_refresh_trans((u64
) zdev
->fh
<< 32, dma_addr
,
202 if (ret
== -ENOMEM
&& !s390_iommu_strict
) {
203 /* enable the hypervisor to free some resources */
204 if (zpci_refresh_global(zdev
))
207 spin_lock_irqsave(&zdev
->iommu_bitmap_lock
, irqflags
);
208 bitmap_andnot(zdev
->iommu_bitmap
, zdev
->iommu_bitmap
,
209 zdev
->lazy_bitmap
, zdev
->iommu_pages
);
210 bitmap_zero(zdev
->lazy_bitmap
, zdev
->iommu_pages
);
211 spin_unlock_irqrestore(&zdev
->iommu_bitmap_lock
, irqflags
);
218 static int dma_update_trans(struct zpci_dev
*zdev
, unsigned long pa
,
219 dma_addr_t dma_addr
, size_t size
, int flags
)
223 rc
= __dma_update_trans(zdev
, pa
, dma_addr
, size
, flags
);
227 rc
= __dma_purge_tlb(zdev
, dma_addr
, size
, flags
);
228 if (rc
&& ((flags
& ZPCI_PTE_VALID_MASK
) == ZPCI_PTE_VALID
))
229 __dma_update_trans(zdev
, pa
, dma_addr
, size
, ZPCI_PTE_INVALID
);
234 void dma_free_seg_table(unsigned long entry
)
236 unsigned long *sto
= get_rt_sto(entry
);
239 for (sx
= 0; sx
< ZPCI_TABLE_ENTRIES
; sx
++)
240 if (reg_entry_isvalid(sto
[sx
]))
241 dma_free_page_table(get_st_pto(sto
[sx
]));
243 dma_free_cpu_table(sto
);
246 void dma_cleanup_tables(unsigned long *table
)
253 for (rtx
= 0; rtx
< ZPCI_TABLE_ENTRIES
; rtx
++)
254 if (reg_entry_isvalid(table
[rtx
]))
255 dma_free_seg_table(table
[rtx
]);
257 dma_free_cpu_table(table
);
260 static unsigned long __dma_alloc_iommu(struct device
*dev
,
261 unsigned long start
, int size
)
263 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
265 return iommu_area_alloc(zdev
->iommu_bitmap
, zdev
->iommu_pages
,
266 start
, size
, zdev
->start_dma
>> PAGE_SHIFT
,
267 dma_get_seg_boundary_nr_pages(dev
, PAGE_SHIFT
),
271 static dma_addr_t
dma_alloc_address(struct device
*dev
, int size
)
273 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
274 unsigned long offset
, flags
;
276 spin_lock_irqsave(&zdev
->iommu_bitmap_lock
, flags
);
277 offset
= __dma_alloc_iommu(dev
, zdev
->next_bit
, size
);
279 if (!s390_iommu_strict
) {
280 /* global flush before DMA addresses are reused */
281 if (zpci_refresh_global(zdev
))
284 bitmap_andnot(zdev
->iommu_bitmap
, zdev
->iommu_bitmap
,
285 zdev
->lazy_bitmap
, zdev
->iommu_pages
);
286 bitmap_zero(zdev
->lazy_bitmap
, zdev
->iommu_pages
);
289 offset
= __dma_alloc_iommu(dev
, 0, size
);
293 zdev
->next_bit
= offset
+ size
;
294 spin_unlock_irqrestore(&zdev
->iommu_bitmap_lock
, flags
);
296 return zdev
->start_dma
+ offset
* PAGE_SIZE
;
299 spin_unlock_irqrestore(&zdev
->iommu_bitmap_lock
, flags
);
300 return DMA_MAPPING_ERROR
;
303 static void dma_free_address(struct device
*dev
, dma_addr_t dma_addr
, int size
)
305 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
306 unsigned long flags
, offset
;
308 offset
= (dma_addr
- zdev
->start_dma
) >> PAGE_SHIFT
;
310 spin_lock_irqsave(&zdev
->iommu_bitmap_lock
, flags
);
311 if (!zdev
->iommu_bitmap
)
314 if (s390_iommu_strict
)
315 bitmap_clear(zdev
->iommu_bitmap
, offset
, size
);
317 bitmap_set(zdev
->lazy_bitmap
, offset
, size
);
320 spin_unlock_irqrestore(&zdev
->iommu_bitmap_lock
, flags
);
323 static inline void zpci_err_dma(unsigned long rc
, unsigned long addr
)
328 } __packed data
= {rc
, addr
};
330 zpci_err_hex(&data
, sizeof(data
));
333 static dma_addr_t
s390_dma_map_pages(struct device
*dev
, struct page
*page
,
334 unsigned long offset
, size_t size
,
335 enum dma_data_direction direction
,
338 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
339 unsigned long pa
= page_to_phys(page
) + offset
;
340 int flags
= ZPCI_PTE_VALID
;
341 unsigned long nr_pages
;
345 /* This rounds up number of pages based on size and offset */
346 nr_pages
= iommu_num_pages(pa
, size
, PAGE_SIZE
);
347 dma_addr
= dma_alloc_address(dev
, nr_pages
);
348 if (dma_addr
== DMA_MAPPING_ERROR
) {
353 /* Use rounded up size */
354 size
= nr_pages
* PAGE_SIZE
;
356 if (direction
== DMA_NONE
|| direction
== DMA_TO_DEVICE
)
357 flags
|= ZPCI_TABLE_PROTECTED
;
359 ret
= dma_update_trans(zdev
, pa
, dma_addr
, size
, flags
);
363 atomic64_add(nr_pages
, &zdev
->mapped_pages
);
364 return dma_addr
+ (offset
& ~PAGE_MASK
);
367 dma_free_address(dev
, dma_addr
, nr_pages
);
369 zpci_err("map error:\n");
370 zpci_err_dma(ret
, pa
);
371 return DMA_MAPPING_ERROR
;
374 static void s390_dma_unmap_pages(struct device
*dev
, dma_addr_t dma_addr
,
375 size_t size
, enum dma_data_direction direction
,
378 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
381 npages
= iommu_num_pages(dma_addr
, size
, PAGE_SIZE
);
382 dma_addr
= dma_addr
& PAGE_MASK
;
383 ret
= dma_update_trans(zdev
, 0, dma_addr
, npages
* PAGE_SIZE
,
386 zpci_err("unmap error:\n");
387 zpci_err_dma(ret
, dma_addr
);
391 atomic64_add(npages
, &zdev
->unmapped_pages
);
392 dma_free_address(dev
, dma_addr
, npages
);
395 static void *s390_dma_alloc(struct device
*dev
, size_t size
,
396 dma_addr_t
*dma_handle
, gfp_t flag
,
399 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
404 size
= PAGE_ALIGN(size
);
405 page
= alloc_pages(flag
| __GFP_ZERO
, get_order(size
));
409 pa
= page_to_phys(page
);
410 map
= s390_dma_map_pages(dev
, page
, 0, size
, DMA_BIDIRECTIONAL
, 0);
411 if (dma_mapping_error(dev
, map
)) {
412 free_pages(pa
, get_order(size
));
416 atomic64_add(size
/ PAGE_SIZE
, &zdev
->allocated_pages
);
422 static void s390_dma_free(struct device
*dev
, size_t size
,
423 void *pa
, dma_addr_t dma_handle
,
426 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
428 size
= PAGE_ALIGN(size
);
429 atomic64_sub(size
/ PAGE_SIZE
, &zdev
->allocated_pages
);
430 s390_dma_unmap_pages(dev
, dma_handle
, size
, DMA_BIDIRECTIONAL
, 0);
431 free_pages((unsigned long) pa
, get_order(size
));
434 /* Map a segment into a contiguous dma address area */
435 static int __s390_dma_map_sg(struct device
*dev
, struct scatterlist
*sg
,
436 size_t size
, dma_addr_t
*handle
,
437 enum dma_data_direction dir
)
439 unsigned long nr_pages
= PAGE_ALIGN(size
) >> PAGE_SHIFT
;
440 struct zpci_dev
*zdev
= to_zpci(to_pci_dev(dev
));
441 dma_addr_t dma_addr_base
, dma_addr
;
442 int flags
= ZPCI_PTE_VALID
;
443 struct scatterlist
*s
;
444 unsigned long pa
= 0;
447 dma_addr_base
= dma_alloc_address(dev
, nr_pages
);
448 if (dma_addr_base
== DMA_MAPPING_ERROR
)
451 dma_addr
= dma_addr_base
;
452 if (dir
== DMA_NONE
|| dir
== DMA_TO_DEVICE
)
453 flags
|= ZPCI_TABLE_PROTECTED
;
455 for (s
= sg
; dma_addr
< dma_addr_base
+ size
; s
= sg_next(s
)) {
456 pa
= page_to_phys(sg_page(s
));
457 ret
= __dma_update_trans(zdev
, pa
, dma_addr
,
458 s
->offset
+ s
->length
, flags
);
462 dma_addr
+= s
->offset
+ s
->length
;
464 ret
= __dma_purge_tlb(zdev
, dma_addr_base
, size
, flags
);
468 *handle
= dma_addr_base
;
469 atomic64_add(nr_pages
, &zdev
->mapped_pages
);
474 dma_update_trans(zdev
, 0, dma_addr_base
, dma_addr
- dma_addr_base
,
476 dma_free_address(dev
, dma_addr_base
, nr_pages
);
477 zpci_err("map error:\n");
478 zpci_err_dma(ret
, pa
);
482 static int s390_dma_map_sg(struct device
*dev
, struct scatterlist
*sg
,
483 int nr_elements
, enum dma_data_direction dir
,
486 struct scatterlist
*s
= sg
, *start
= sg
, *dma
= sg
;
487 unsigned int max
= dma_get_max_seg_size(dev
);
488 unsigned int size
= s
->offset
+ s
->length
;
489 unsigned int offset
= s
->offset
;
492 for (i
= 1; i
< nr_elements
; i
++) {
495 s
->dma_address
= DMA_MAPPING_ERROR
;
498 if (s
->offset
|| (size
& ~PAGE_MASK
) ||
499 size
+ s
->length
> max
) {
500 if (__s390_dma_map_sg(dev
, start
, size
,
501 &dma
->dma_address
, dir
))
504 dma
->dma_address
+= offset
;
505 dma
->dma_length
= size
- offset
;
507 size
= offset
= s
->offset
;
514 if (__s390_dma_map_sg(dev
, start
, size
, &dma
->dma_address
, dir
))
517 dma
->dma_address
+= offset
;
518 dma
->dma_length
= size
- offset
;
522 for_each_sg(sg
, s
, count
, i
)
523 s390_dma_unmap_pages(dev
, sg_dma_address(s
), sg_dma_len(s
),
529 static void s390_dma_unmap_sg(struct device
*dev
, struct scatterlist
*sg
,
530 int nr_elements
, enum dma_data_direction dir
,
533 struct scatterlist
*s
;
536 for_each_sg(sg
, s
, nr_elements
, i
) {
538 s390_dma_unmap_pages(dev
, s
->dma_address
, s
->dma_length
,
545 int zpci_dma_init_device(struct zpci_dev
*zdev
)
550 * At this point, if the device is part of an IOMMU domain, this would
551 * be a strong hint towards a bug in the IOMMU API (common) code and/or
552 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
554 WARN_ON(zdev
->s390_domain
);
556 spin_lock_init(&zdev
->iommu_bitmap_lock
);
557 spin_lock_init(&zdev
->dma_table_lock
);
559 zdev
->dma_table
= dma_alloc_cpu_table();
560 if (!zdev
->dma_table
) {
566 * Restrict the iommu bitmap size to the minimum of the following:
568 * - 3-level pagetable address limit minus start_dma offset
569 * - DMA address range allowed by the hardware (clp query pci fn)
571 * Also set zdev->end_dma to the actual end address of the usable
572 * range, instead of the theoretical maximum as reported by hardware.
574 zdev
->start_dma
= PAGE_ALIGN(zdev
->start_dma
);
575 zdev
->iommu_size
= min3((u64
) high_memory
,
576 ZPCI_TABLE_SIZE_RT
- zdev
->start_dma
,
577 zdev
->end_dma
- zdev
->start_dma
+ 1);
578 zdev
->end_dma
= zdev
->start_dma
+ zdev
->iommu_size
- 1;
579 zdev
->iommu_pages
= zdev
->iommu_size
>> PAGE_SHIFT
;
580 zdev
->iommu_bitmap
= vzalloc(zdev
->iommu_pages
/ 8);
581 if (!zdev
->iommu_bitmap
) {
585 if (!s390_iommu_strict
) {
586 zdev
->lazy_bitmap
= vzalloc(zdev
->iommu_pages
/ 8);
587 if (!zdev
->lazy_bitmap
) {
593 rc
= zpci_register_ioat(zdev
, 0, zdev
->start_dma
, zdev
->end_dma
,
594 (u64
) zdev
->dma_table
);
600 vfree(zdev
->iommu_bitmap
);
601 zdev
->iommu_bitmap
= NULL
;
602 vfree(zdev
->lazy_bitmap
);
603 zdev
->lazy_bitmap
= NULL
;
605 dma_free_cpu_table(zdev
->dma_table
);
606 zdev
->dma_table
= NULL
;
611 void zpci_dma_exit_device(struct zpci_dev
*zdev
)
614 * At this point, if the device is part of an IOMMU domain, this would
615 * be a strong hint towards a bug in the IOMMU API (common) code and/or
616 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
618 WARN_ON(zdev
->s390_domain
);
620 if (zpci_unregister_ioat(zdev
, 0))
623 dma_cleanup_tables(zdev
->dma_table
);
624 zdev
->dma_table
= NULL
;
625 vfree(zdev
->iommu_bitmap
);
626 zdev
->iommu_bitmap
= NULL
;
627 vfree(zdev
->lazy_bitmap
);
628 zdev
->lazy_bitmap
= NULL
;
633 static int __init
dma_alloc_cpu_table_caches(void)
635 dma_region_table_cache
= kmem_cache_create("PCI_DMA_region_tables",
636 ZPCI_TABLE_SIZE
, ZPCI_TABLE_ALIGN
,
638 if (!dma_region_table_cache
)
641 dma_page_table_cache
= kmem_cache_create("PCI_DMA_page_tables",
642 ZPCI_PT_SIZE
, ZPCI_PT_ALIGN
,
644 if (!dma_page_table_cache
) {
645 kmem_cache_destroy(dma_region_table_cache
);
651 int __init
zpci_dma_init(void)
653 return dma_alloc_cpu_table_caches();
656 void zpci_dma_exit(void)
658 kmem_cache_destroy(dma_page_table_cache
);
659 kmem_cache_destroy(dma_region_table_cache
);
662 const struct dma_map_ops s390_pci_dma_ops
= {
663 .alloc
= s390_dma_alloc
,
664 .free
= s390_dma_free
,
665 .map_sg
= s390_dma_map_sg
,
666 .unmap_sg
= s390_dma_unmap_sg
,
667 .map_page
= s390_dma_map_pages
,
668 .unmap_page
= s390_dma_unmap_pages
,
669 .mmap
= dma_common_mmap
,
670 .get_sgtable
= dma_common_get_sgtable
,
671 .alloc_pages
= dma_common_alloc_pages
,
672 .free_pages
= dma_common_free_pages
,
673 /* dma_supported is unconditionally true without a callback */
675 EXPORT_SYMBOL_GPL(s390_pci_dma_ops
);
677 static int __init
s390_iommu_setup(char *str
)
679 if (!strcmp(str
, "strict"))
680 s390_iommu_strict
= 1;
684 __setup("s390_iommu=", s390_iommu_setup
);