1 // SPDX-License-Identifier: GPL-2.0-only
3 * MMU-based software IOTLB.
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
7 * Author: Xie Yongji <xieyongji@bytedance.com>
11 #include <linux/slab.h>
12 #include <linux/file.h>
13 #include <linux/anon_inodes.h>
14 #include <linux/highmem.h>
15 #include <linux/vmalloc.h>
16 #include <linux/vdpa.h>
18 #include "iova_domain.h"
20 static int vduse_iotlb_add_range(struct vduse_iova_domain
*domain
,
22 u64 addr
, unsigned int perm
,
23 struct file
*file
, u64 offset
)
25 struct vdpa_map_file
*map_file
;
28 map_file
= kmalloc(sizeof(*map_file
), GFP_ATOMIC
);
32 map_file
->file
= get_file(file
);
33 map_file
->offset
= offset
;
35 ret
= vhost_iotlb_add_range_ctx(domain
->iotlb
, start
, last
,
36 addr
, perm
, map_file
);
45 static void vduse_iotlb_del_range(struct vduse_iova_domain
*domain
,
48 struct vdpa_map_file
*map_file
;
49 struct vhost_iotlb_map
*map
;
51 while ((map
= vhost_iotlb_itree_first(domain
->iotlb
, start
, last
))) {
52 map_file
= (struct vdpa_map_file
*)map
->opaque
;
55 vhost_iotlb_map_free(domain
->iotlb
, map
);
59 int vduse_domain_set_map(struct vduse_iova_domain
*domain
,
60 struct vhost_iotlb
*iotlb
)
62 struct vdpa_map_file
*map_file
;
63 struct vhost_iotlb_map
*map
;
64 u64 start
= 0ULL, last
= ULLONG_MAX
;
67 spin_lock(&domain
->iotlb_lock
);
68 vduse_iotlb_del_range(domain
, start
, last
);
70 for (map
= vhost_iotlb_itree_first(iotlb
, start
, last
); map
;
71 map
= vhost_iotlb_itree_next(map
, start
, last
)) {
72 map_file
= (struct vdpa_map_file
*)map
->opaque
;
73 ret
= vduse_iotlb_add_range(domain
, map
->start
, map
->last
,
80 spin_unlock(&domain
->iotlb_lock
);
84 vduse_iotlb_del_range(domain
, start
, last
);
85 spin_unlock(&domain
->iotlb_lock
);
89 void vduse_domain_clear_map(struct vduse_iova_domain
*domain
,
90 struct vhost_iotlb
*iotlb
)
92 struct vhost_iotlb_map
*map
;
93 u64 start
= 0ULL, last
= ULLONG_MAX
;
95 spin_lock(&domain
->iotlb_lock
);
96 for (map
= vhost_iotlb_itree_first(iotlb
, start
, last
); map
;
97 map
= vhost_iotlb_itree_next(map
, start
, last
)) {
98 vduse_iotlb_del_range(domain
, map
->start
, map
->last
);
100 spin_unlock(&domain
->iotlb_lock
);
103 static int vduse_domain_map_bounce_page(struct vduse_iova_domain
*domain
,
104 u64 iova
, u64 size
, u64 paddr
)
106 struct vduse_bounce_map
*map
;
107 u64 last
= iova
+ size
- 1;
109 while (iova
<= last
) {
110 map
= &domain
->bounce_maps
[iova
>> PAGE_SHIFT
];
111 if (!map
->bounce_page
) {
112 map
->bounce_page
= alloc_page(GFP_ATOMIC
);
113 if (!map
->bounce_page
)
116 map
->orig_phys
= paddr
;
123 static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain
*domain
,
126 struct vduse_bounce_map
*map
;
127 u64 last
= iova
+ size
- 1;
129 while (iova
<= last
) {
130 map
= &domain
->bounce_maps
[iova
>> PAGE_SHIFT
];
131 map
->orig_phys
= INVALID_PHYS_ADDR
;
136 static void do_bounce(phys_addr_t orig
, void *addr
, size_t size
,
137 enum dma_data_direction dir
)
139 unsigned long pfn
= PFN_DOWN(orig
);
140 unsigned int offset
= offset_in_page(orig
);
145 sz
= min_t(size_t, PAGE_SIZE
- offset
, size
);
147 page
= pfn_to_page(pfn
);
148 if (dir
== DMA_TO_DEVICE
)
149 memcpy_from_page(addr
, page
, offset
, sz
);
151 memcpy_to_page(page
, offset
, addr
, sz
);
160 static void vduse_domain_bounce(struct vduse_iova_domain
*domain
,
161 dma_addr_t iova
, size_t size
,
162 enum dma_data_direction dir
)
164 struct vduse_bounce_map
*map
;
170 if (iova
>= domain
->bounce_size
)
174 map
= &domain
->bounce_maps
[iova
>> PAGE_SHIFT
];
175 offset
= offset_in_page(iova
);
176 sz
= min_t(size_t, PAGE_SIZE
- offset
, size
);
178 if (WARN_ON(!map
->bounce_page
||
179 map
->orig_phys
== INVALID_PHYS_ADDR
))
182 page
= domain
->user_bounce_pages
?
183 map
->user_bounce_page
: map
->bounce_page
;
185 addr
= kmap_local_page(page
);
186 do_bounce(map
->orig_phys
+ offset
, addr
+ offset
, sz
, dir
);
194 vduse_domain_get_coherent_page(struct vduse_iova_domain
*domain
, u64 iova
)
196 u64 start
= iova
& PAGE_MASK
;
197 u64 last
= start
+ PAGE_SIZE
- 1;
198 struct vhost_iotlb_map
*map
;
199 struct page
*page
= NULL
;
201 spin_lock(&domain
->iotlb_lock
);
202 map
= vhost_iotlb_itree_first(domain
->iotlb
, start
, last
);
206 page
= pfn_to_page((map
->addr
+ iova
- map
->start
) >> PAGE_SHIFT
);
209 spin_unlock(&domain
->iotlb_lock
);
215 vduse_domain_get_bounce_page(struct vduse_iova_domain
*domain
, u64 iova
)
217 struct vduse_bounce_map
*map
;
218 struct page
*page
= NULL
;
220 read_lock(&domain
->bounce_lock
);
221 map
= &domain
->bounce_maps
[iova
>> PAGE_SHIFT
];
222 if (domain
->user_bounce_pages
|| !map
->bounce_page
)
225 page
= map
->bounce_page
;
228 read_unlock(&domain
->bounce_lock
);
234 vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain
*domain
)
236 struct vduse_bounce_map
*map
;
237 unsigned long pfn
, bounce_pfns
;
239 bounce_pfns
= domain
->bounce_size
>> PAGE_SHIFT
;
241 for (pfn
= 0; pfn
< bounce_pfns
; pfn
++) {
242 map
= &domain
->bounce_maps
[pfn
];
243 if (WARN_ON(map
->orig_phys
!= INVALID_PHYS_ADDR
))
246 if (!map
->bounce_page
)
249 __free_page(map
->bounce_page
);
250 map
->bounce_page
= NULL
;
254 int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain
*domain
,
255 struct page
**pages
, int count
)
257 struct vduse_bounce_map
*map
;
260 /* Now we don't support partial mapping */
261 if (count
!= (domain
->bounce_size
>> PAGE_SHIFT
))
264 write_lock(&domain
->bounce_lock
);
266 if (domain
->user_bounce_pages
)
269 for (i
= 0; i
< count
; i
++) {
270 map
= &domain
->bounce_maps
[i
];
271 if (map
->bounce_page
) {
272 /* Copy kernel page to user page if it's in use */
273 if (map
->orig_phys
!= INVALID_PHYS_ADDR
)
274 memcpy_to_page(pages
[i
], 0,
275 page_address(map
->bounce_page
),
278 map
->user_bounce_page
= pages
[i
];
281 domain
->user_bounce_pages
= true;
284 write_unlock(&domain
->bounce_lock
);
289 void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain
*domain
)
291 struct vduse_bounce_map
*map
;
292 unsigned long i
, count
;
294 write_lock(&domain
->bounce_lock
);
295 if (!domain
->user_bounce_pages
)
298 count
= domain
->bounce_size
>> PAGE_SHIFT
;
299 for (i
= 0; i
< count
; i
++) {
300 struct page
*page
= NULL
;
302 map
= &domain
->bounce_maps
[i
];
303 if (WARN_ON(!map
->user_bounce_page
))
306 /* Copy user page to kernel page if it's in use */
307 if (map
->orig_phys
!= INVALID_PHYS_ADDR
) {
308 page
= map
->bounce_page
;
309 memcpy_from_page(page_address(page
),
310 map
->user_bounce_page
, 0, PAGE_SIZE
);
312 put_page(map
->user_bounce_page
);
313 map
->user_bounce_page
= NULL
;
315 domain
->user_bounce_pages
= false;
317 write_unlock(&domain
->bounce_lock
);
320 void vduse_domain_reset_bounce_map(struct vduse_iova_domain
*domain
)
322 if (!domain
->bounce_map
)
325 spin_lock(&domain
->iotlb_lock
);
326 if (!domain
->bounce_map
)
329 vduse_iotlb_del_range(domain
, 0, domain
->bounce_size
- 1);
330 domain
->bounce_map
= 0;
332 spin_unlock(&domain
->iotlb_lock
);
335 static int vduse_domain_init_bounce_map(struct vduse_iova_domain
*domain
)
339 if (domain
->bounce_map
)
342 spin_lock(&domain
->iotlb_lock
);
343 if (domain
->bounce_map
)
346 ret
= vduse_iotlb_add_range(domain
, 0, domain
->bounce_size
- 1,
347 0, VHOST_MAP_RW
, domain
->file
, 0);
351 domain
->bounce_map
= 1;
353 spin_unlock(&domain
->iotlb_lock
);
358 vduse_domain_alloc_iova(struct iova_domain
*iovad
,
359 unsigned long size
, unsigned long limit
)
361 unsigned long shift
= iova_shift(iovad
);
362 unsigned long iova_len
= iova_align(iovad
, size
) >> shift
;
363 unsigned long iova_pfn
;
365 iova_pfn
= alloc_iova_fast(iovad
, iova_len
, limit
>> shift
, true);
367 return (dma_addr_t
)iova_pfn
<< shift
;
370 static void vduse_domain_free_iova(struct iova_domain
*iovad
,
371 dma_addr_t iova
, size_t size
)
373 unsigned long shift
= iova_shift(iovad
);
374 unsigned long iova_len
= iova_align(iovad
, size
) >> shift
;
376 free_iova_fast(iovad
, iova
>> shift
, iova_len
);
379 void vduse_domain_sync_single_for_device(struct vduse_iova_domain
*domain
,
380 dma_addr_t dma_addr
, size_t size
,
381 enum dma_data_direction dir
)
383 read_lock(&domain
->bounce_lock
);
384 if (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
385 vduse_domain_bounce(domain
, dma_addr
, size
, DMA_TO_DEVICE
);
386 read_unlock(&domain
->bounce_lock
);
389 void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain
*domain
,
390 dma_addr_t dma_addr
, size_t size
,
391 enum dma_data_direction dir
)
393 read_lock(&domain
->bounce_lock
);
394 if (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
)
395 vduse_domain_bounce(domain
, dma_addr
, size
, DMA_FROM_DEVICE
);
396 read_unlock(&domain
->bounce_lock
);
399 dma_addr_t
vduse_domain_map_page(struct vduse_iova_domain
*domain
,
400 struct page
*page
, unsigned long offset
,
401 size_t size
, enum dma_data_direction dir
,
404 struct iova_domain
*iovad
= &domain
->stream_iovad
;
405 unsigned long limit
= domain
->bounce_size
- 1;
406 phys_addr_t pa
= page_to_phys(page
) + offset
;
407 dma_addr_t iova
= vduse_domain_alloc_iova(iovad
, size
, limit
);
410 return DMA_MAPPING_ERROR
;
412 if (vduse_domain_init_bounce_map(domain
))
415 read_lock(&domain
->bounce_lock
);
416 if (vduse_domain_map_bounce_page(domain
, (u64
)iova
, (u64
)size
, pa
))
419 if (!(attrs
& DMA_ATTR_SKIP_CPU_SYNC
) &&
420 (dir
== DMA_TO_DEVICE
|| dir
== DMA_BIDIRECTIONAL
))
421 vduse_domain_bounce(domain
, iova
, size
, DMA_TO_DEVICE
);
423 read_unlock(&domain
->bounce_lock
);
427 read_unlock(&domain
->bounce_lock
);
429 vduse_domain_free_iova(iovad
, iova
, size
);
430 return DMA_MAPPING_ERROR
;
433 void vduse_domain_unmap_page(struct vduse_iova_domain
*domain
,
434 dma_addr_t dma_addr
, size_t size
,
435 enum dma_data_direction dir
, unsigned long attrs
)
437 struct iova_domain
*iovad
= &domain
->stream_iovad
;
438 read_lock(&domain
->bounce_lock
);
439 if (!(attrs
& DMA_ATTR_SKIP_CPU_SYNC
) &&
440 (dir
== DMA_FROM_DEVICE
|| dir
== DMA_BIDIRECTIONAL
))
441 vduse_domain_bounce(domain
, dma_addr
, size
, DMA_FROM_DEVICE
);
443 vduse_domain_unmap_bounce_page(domain
, (u64
)dma_addr
, (u64
)size
);
444 read_unlock(&domain
->bounce_lock
);
445 vduse_domain_free_iova(iovad
, dma_addr
, size
);
448 void *vduse_domain_alloc_coherent(struct vduse_iova_domain
*domain
,
449 size_t size
, dma_addr_t
*dma_addr
,
450 gfp_t flag
, unsigned long attrs
)
452 struct iova_domain
*iovad
= &domain
->consistent_iovad
;
453 unsigned long limit
= domain
->iova_limit
;
454 dma_addr_t iova
= vduse_domain_alloc_iova(iovad
, size
, limit
);
455 void *orig
= alloc_pages_exact(size
, flag
);
460 spin_lock(&domain
->iotlb_lock
);
461 if (vduse_iotlb_add_range(domain
, (u64
)iova
, (u64
)iova
+ size
- 1,
462 virt_to_phys(orig
), VHOST_MAP_RW
,
463 domain
->file
, (u64
)iova
)) {
464 spin_unlock(&domain
->iotlb_lock
);
467 spin_unlock(&domain
->iotlb_lock
);
473 *dma_addr
= DMA_MAPPING_ERROR
;
475 free_pages_exact(orig
, size
);
477 vduse_domain_free_iova(iovad
, iova
, size
);
482 void vduse_domain_free_coherent(struct vduse_iova_domain
*domain
, size_t size
,
483 void *vaddr
, dma_addr_t dma_addr
,
486 struct iova_domain
*iovad
= &domain
->consistent_iovad
;
487 struct vhost_iotlb_map
*map
;
488 struct vdpa_map_file
*map_file
;
491 spin_lock(&domain
->iotlb_lock
);
492 map
= vhost_iotlb_itree_first(domain
->iotlb
, (u64
)dma_addr
,
493 (u64
)dma_addr
+ size
- 1);
495 spin_unlock(&domain
->iotlb_lock
);
498 map_file
= (struct vdpa_map_file
*)map
->opaque
;
499 fput(map_file
->file
);
502 vhost_iotlb_map_free(domain
->iotlb
, map
);
503 spin_unlock(&domain
->iotlb_lock
);
505 vduse_domain_free_iova(iovad
, dma_addr
, size
);
506 free_pages_exact(phys_to_virt(pa
), size
);
509 static vm_fault_t
vduse_domain_mmap_fault(struct vm_fault
*vmf
)
511 struct vduse_iova_domain
*domain
= vmf
->vma
->vm_private_data
;
512 unsigned long iova
= vmf
->pgoff
<< PAGE_SHIFT
;
516 return VM_FAULT_SIGBUS
;
518 if (iova
< domain
->bounce_size
)
519 page
= vduse_domain_get_bounce_page(domain
, iova
);
521 page
= vduse_domain_get_coherent_page(domain
, iova
);
524 return VM_FAULT_SIGBUS
;
531 static const struct vm_operations_struct vduse_domain_mmap_ops
= {
532 .fault
= vduse_domain_mmap_fault
,
535 static int vduse_domain_mmap(struct file
*file
, struct vm_area_struct
*vma
)
537 struct vduse_iova_domain
*domain
= file
->private_data
;
539 vm_flags_set(vma
, VM_DONTDUMP
| VM_DONTEXPAND
);
540 vma
->vm_private_data
= domain
;
541 vma
->vm_ops
= &vduse_domain_mmap_ops
;
546 static int vduse_domain_release(struct inode
*inode
, struct file
*file
)
548 struct vduse_iova_domain
*domain
= file
->private_data
;
550 spin_lock(&domain
->iotlb_lock
);
551 vduse_iotlb_del_range(domain
, 0, ULLONG_MAX
);
552 vduse_domain_remove_user_bounce_pages(domain
);
553 vduse_domain_free_kernel_bounce_pages(domain
);
554 spin_unlock(&domain
->iotlb_lock
);
555 put_iova_domain(&domain
->stream_iovad
);
556 put_iova_domain(&domain
->consistent_iovad
);
557 vhost_iotlb_free(domain
->iotlb
);
558 vfree(domain
->bounce_maps
);
564 static const struct file_operations vduse_domain_fops
= {
565 .owner
= THIS_MODULE
,
566 .mmap
= vduse_domain_mmap
,
567 .release
= vduse_domain_release
,
570 void vduse_domain_destroy(struct vduse_iova_domain
*domain
)
575 struct vduse_iova_domain
*
576 vduse_domain_create(unsigned long iova_limit
, size_t bounce_size
)
578 struct vduse_iova_domain
*domain
;
580 struct vduse_bounce_map
*map
;
581 unsigned long pfn
, bounce_pfns
;
584 bounce_pfns
= PAGE_ALIGN(bounce_size
) >> PAGE_SHIFT
;
585 if (iova_limit
<= bounce_size
)
588 domain
= kzalloc(sizeof(*domain
), GFP_KERNEL
);
592 domain
->iotlb
= vhost_iotlb_alloc(0, 0);
596 domain
->iova_limit
= iova_limit
;
597 domain
->bounce_size
= PAGE_ALIGN(bounce_size
);
598 domain
->bounce_maps
= vzalloc(bounce_pfns
*
599 sizeof(struct vduse_bounce_map
));
600 if (!domain
->bounce_maps
)
603 for (pfn
= 0; pfn
< bounce_pfns
; pfn
++) {
604 map
= &domain
->bounce_maps
[pfn
];
605 map
->orig_phys
= INVALID_PHYS_ADDR
;
607 file
= anon_inode_getfile("[vduse-domain]", &vduse_domain_fops
,
613 rwlock_init(&domain
->bounce_lock
);
614 spin_lock_init(&domain
->iotlb_lock
);
615 init_iova_domain(&domain
->stream_iovad
,
616 PAGE_SIZE
, IOVA_START_PFN
);
617 ret
= iova_domain_init_rcaches(&domain
->stream_iovad
);
619 goto err_iovad_stream
;
620 init_iova_domain(&domain
->consistent_iovad
,
621 PAGE_SIZE
, bounce_pfns
);
622 ret
= iova_domain_init_rcaches(&domain
->consistent_iovad
);
624 goto err_iovad_consistent
;
627 err_iovad_consistent
:
628 put_iova_domain(&domain
->stream_iovad
);
632 vfree(domain
->bounce_maps
);
634 vhost_iotlb_free(domain
->iotlb
);
640 int vduse_domain_init(void)
642 return iova_cache_get();
645 void vduse_domain_exit(void)