1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/cred.h>
3 #include <linux/device.h>
4 #include <linux/dma-buf.h>
5 #include <linux/dma-resv.h>
6 #include <linux/highmem.h>
7 #include <linux/init.h>
8 #include <linux/kernel.h>
9 #include <linux/memfd.h>
10 #include <linux/miscdevice.h>
11 #include <linux/module.h>
12 #include <linux/shmem_fs.h>
13 #include <linux/hugetlb.h>
14 #include <linux/slab.h>
15 #include <linux/udmabuf.h>
16 #include <linux/vmalloc.h>
17 #include <linux/iosys-map.h>
19 static int list_limit
= 1024;
20 module_param(list_limit
, int, 0644);
21 MODULE_PARM_DESC(list_limit
, "udmabuf_create_list->count limit. Default is 1024.");
23 static int size_limit_mb
= 64;
24 module_param(size_limit_mb
, int, 0644);
25 MODULE_PARM_DESC(size_limit_mb
, "Max size of a dmabuf, in megabytes. Default is 64.");
29 struct folio
**folios
;
32 * Unlike folios, pinned_folios is only used for unpin.
33 * So, nr_pinned is not the same to pagecount, the pinned_folios
34 * only set each folio which already pinned when udmabuf_create.
35 * Note that, since a folio may be pinned multiple times, each folio
36 * can be added to pinned_folios multiple times, depending on how many
37 * times the folio has been pinned when create.
40 struct folio
**pinned_folios
;
43 struct miscdevice
*device
;
47 static vm_fault_t
udmabuf_vm_fault(struct vm_fault
*vmf
)
49 struct vm_area_struct
*vma
= vmf
->vma
;
50 struct udmabuf
*ubuf
= vma
->vm_private_data
;
51 pgoff_t pgoff
= vmf
->pgoff
;
52 unsigned long addr
, pfn
;
55 if (pgoff
>= ubuf
->pagecount
)
56 return VM_FAULT_SIGBUS
;
58 pfn
= folio_pfn(ubuf
->folios
[pgoff
]);
59 pfn
+= ubuf
->offsets
[pgoff
] >> PAGE_SHIFT
;
61 ret
= vmf_insert_pfn(vma
, vmf
->address
, pfn
);
62 if (ret
& VM_FAULT_ERROR
)
66 pgoff
= vma
->vm_pgoff
;
69 for (; addr
< vma
->vm_end
; pgoff
++, addr
+= PAGE_SIZE
) {
70 if (addr
== vmf
->address
)
73 if (WARN_ON(pgoff
>= ubuf
->pagecount
))
76 pfn
= folio_pfn(ubuf
->folios
[pgoff
]);
77 pfn
+= ubuf
->offsets
[pgoff
] >> PAGE_SHIFT
;
80 * If the below vmf_insert_pfn() fails, we do not return an
81 * error here during this pre-fault step. However, an error
82 * will be returned if the failure occurs when the addr is
85 if (vmf_insert_pfn(vma
, addr
, pfn
) & VM_FAULT_ERROR
)
92 static const struct vm_operations_struct udmabuf_vm_ops
= {
93 .fault
= udmabuf_vm_fault
,
96 static int mmap_udmabuf(struct dma_buf
*buf
, struct vm_area_struct
*vma
)
98 struct udmabuf
*ubuf
= buf
->priv
;
100 if ((vma
->vm_flags
& (VM_SHARED
| VM_MAYSHARE
)) == 0)
103 vma
->vm_ops
= &udmabuf_vm_ops
;
104 vma
->vm_private_data
= ubuf
;
105 vm_flags_set(vma
, VM_PFNMAP
| VM_DONTEXPAND
| VM_DONTDUMP
);
109 static int vmap_udmabuf(struct dma_buf
*buf
, struct iosys_map
*map
)
111 struct udmabuf
*ubuf
= buf
->priv
;
116 dma_resv_assert_held(buf
->resv
);
119 * HVO may free tail pages, so just use pfn to map each folio
122 pfns
= kvmalloc_array(ubuf
->pagecount
, sizeof(*pfns
), GFP_KERNEL
);
126 for (pg
= 0; pg
< ubuf
->pagecount
; pg
++) {
127 unsigned long pfn
= folio_pfn(ubuf
->folios
[pg
]);
129 pfn
+= ubuf
->offsets
[pg
] >> PAGE_SHIFT
;
133 vaddr
= vmap_pfn(pfns
, ubuf
->pagecount
, PAGE_KERNEL
);
138 iosys_map_set_vaddr(map
, vaddr
);
142 static void vunmap_udmabuf(struct dma_buf
*buf
, struct iosys_map
*map
)
144 struct udmabuf
*ubuf
= buf
->priv
;
146 dma_resv_assert_held(buf
->resv
);
148 vm_unmap_ram(map
->vaddr
, ubuf
->pagecount
);
151 static struct sg_table
*get_sg_table(struct device
*dev
, struct dma_buf
*buf
,
152 enum dma_data_direction direction
)
154 struct udmabuf
*ubuf
= buf
->priv
;
156 struct scatterlist
*sgl
;
160 sg
= kzalloc(sizeof(*sg
), GFP_KERNEL
);
162 return ERR_PTR(-ENOMEM
);
164 ret
= sg_alloc_table(sg
, ubuf
->pagecount
, GFP_KERNEL
);
168 for_each_sg(sg
->sgl
, sgl
, ubuf
->pagecount
, i
)
169 sg_set_folio(sgl
, ubuf
->folios
[i
], PAGE_SIZE
,
172 ret
= dma_map_sgtable(dev
, sg
, direction
, 0);
184 static void put_sg_table(struct device
*dev
, struct sg_table
*sg
,
185 enum dma_data_direction direction
)
187 dma_unmap_sgtable(dev
, sg
, direction
, 0);
192 static struct sg_table
*map_udmabuf(struct dma_buf_attachment
*at
,
193 enum dma_data_direction direction
)
195 return get_sg_table(at
->dev
, at
->dmabuf
, direction
);
198 static void unmap_udmabuf(struct dma_buf_attachment
*at
,
200 enum dma_data_direction direction
)
202 return put_sg_table(at
->dev
, sg
, direction
);
205 static void unpin_all_folios(struct udmabuf
*ubuf
)
209 for (i
= 0; i
< ubuf
->nr_pinned
; ++i
)
210 unpin_folio(ubuf
->pinned_folios
[i
]);
212 kvfree(ubuf
->pinned_folios
);
215 static __always_inline
int init_udmabuf(struct udmabuf
*ubuf
, pgoff_t pgcnt
)
217 ubuf
->folios
= kvmalloc_array(pgcnt
, sizeof(*ubuf
->folios
), GFP_KERNEL
);
221 ubuf
->offsets
= kvcalloc(pgcnt
, sizeof(*ubuf
->offsets
), GFP_KERNEL
);
225 ubuf
->pinned_folios
= kvmalloc_array(pgcnt
,
226 sizeof(*ubuf
->pinned_folios
),
228 if (!ubuf
->pinned_folios
)
234 static __always_inline
void deinit_udmabuf(struct udmabuf
*ubuf
)
236 unpin_all_folios(ubuf
);
237 kvfree(ubuf
->offsets
);
238 kvfree(ubuf
->folios
);
241 static void release_udmabuf(struct dma_buf
*buf
)
243 struct udmabuf
*ubuf
= buf
->priv
;
244 struct device
*dev
= ubuf
->device
->this_device
;
247 put_sg_table(dev
, ubuf
->sg
, DMA_BIDIRECTIONAL
);
249 deinit_udmabuf(ubuf
);
253 static int begin_cpu_udmabuf(struct dma_buf
*buf
,
254 enum dma_data_direction direction
)
256 struct udmabuf
*ubuf
= buf
->priv
;
257 struct device
*dev
= ubuf
->device
->this_device
;
261 ubuf
->sg
= get_sg_table(dev
, buf
, direction
);
262 if (IS_ERR(ubuf
->sg
)) {
263 ret
= PTR_ERR(ubuf
->sg
);
267 dma_sync_sg_for_cpu(dev
, ubuf
->sg
->sgl
, ubuf
->sg
->nents
,
274 static int end_cpu_udmabuf(struct dma_buf
*buf
,
275 enum dma_data_direction direction
)
277 struct udmabuf
*ubuf
= buf
->priv
;
278 struct device
*dev
= ubuf
->device
->this_device
;
283 dma_sync_sg_for_device(dev
, ubuf
->sg
->sgl
, ubuf
->sg
->nents
, direction
);
287 static const struct dma_buf_ops udmabuf_ops
= {
288 .cache_sgt_mapping
= true,
289 .map_dma_buf
= map_udmabuf
,
290 .unmap_dma_buf
= unmap_udmabuf
,
291 .release
= release_udmabuf
,
292 .mmap
= mmap_udmabuf
,
293 .vmap
= vmap_udmabuf
,
294 .vunmap
= vunmap_udmabuf
,
295 .begin_cpu_access
= begin_cpu_udmabuf
,
296 .end_cpu_access
= end_cpu_udmabuf
,
299 #define SEALS_WANTED (F_SEAL_SHRINK)
300 #define SEALS_DENIED (F_SEAL_WRITE)
302 static int check_memfd_seals(struct file
*memfd
)
306 if (!shmem_file(memfd
) && !is_file_hugepages(memfd
))
309 seals
= memfd_fcntl(memfd
, F_GET_SEALS
, 0);
310 if (seals
== -EINVAL
)
313 if ((seals
& SEALS_WANTED
) != SEALS_WANTED
||
314 (seals
& SEALS_DENIED
) != 0)
320 static int export_udmabuf(struct udmabuf
*ubuf
,
321 struct miscdevice
*device
,
324 DEFINE_DMA_BUF_EXPORT_INFO(exp_info
);
327 ubuf
->device
= device
;
328 exp_info
.ops
= &udmabuf_ops
;
329 exp_info
.size
= ubuf
->pagecount
<< PAGE_SHIFT
;
330 exp_info
.priv
= ubuf
;
331 exp_info
.flags
= O_RDWR
;
333 buf
= dma_buf_export(&exp_info
);
337 return dma_buf_fd(buf
, flags
);
340 static long udmabuf_pin_folios(struct udmabuf
*ubuf
, struct file
*memfd
,
341 loff_t start
, loff_t size
, struct folio
**folios
)
343 pgoff_t nr_pinned
= ubuf
->nr_pinned
;
344 pgoff_t upgcnt
= ubuf
->pagecount
;
345 u32 cur_folio
, cur_pgcnt
;
346 pgoff_t pgoff
, pgcnt
;
350 pgcnt
= size
>> PAGE_SHIFT
;
351 end
= start
+ (pgcnt
<< PAGE_SHIFT
) - 1;
352 nr_folios
= memfd_pin_folios(memfd
, start
, end
, folios
, pgcnt
, &pgoff
);
354 return nr_folios
? nr_folios
: -EINVAL
;
357 for (cur_folio
= 0; cur_folio
< nr_folios
; ++cur_folio
) {
358 pgoff_t subpgoff
= pgoff
;
359 size_t fsize
= folio_size(folios
[cur_folio
]);
361 ubuf
->pinned_folios
[nr_pinned
++] = folios
[cur_folio
];
363 for (; subpgoff
< fsize
; subpgoff
+= PAGE_SIZE
) {
364 ubuf
->folios
[upgcnt
] = folios
[cur_folio
];
365 ubuf
->offsets
[upgcnt
] = subpgoff
;
368 if (++cur_pgcnt
>= pgcnt
)
373 * In a given range, only the first subpage of the first folio
374 * has an offset, that is returned by memfd_pin_folios().
375 * The first subpages of other folios (in the range) have an
381 ubuf
->pagecount
= upgcnt
;
382 ubuf
->nr_pinned
= nr_pinned
;
386 static long udmabuf_create(struct miscdevice
*device
,
387 struct udmabuf_create_list
*head
,
388 struct udmabuf_create_item
*list
)
390 unsigned long max_nr_folios
= 0;
391 struct folio
**folios
= NULL
;
392 pgoff_t pgcnt
= 0, pglimit
;
393 struct udmabuf
*ubuf
;
397 ubuf
= kzalloc(sizeof(*ubuf
), GFP_KERNEL
);
401 pglimit
= (size_limit_mb
* 1024 * 1024) >> PAGE_SHIFT
;
402 for (i
= 0; i
< head
->count
; i
++) {
405 if (!PAGE_ALIGNED(list
[i
].offset
))
407 if (!PAGE_ALIGNED(list
[i
].size
))
410 subpgcnt
= list
[i
].size
>> PAGE_SHIFT
;
415 max_nr_folios
= max_t(unsigned long, subpgcnt
, max_nr_folios
);
421 ret
= init_udmabuf(ubuf
, pgcnt
);
425 folios
= kvmalloc_array(max_nr_folios
, sizeof(*folios
), GFP_KERNEL
);
431 for (i
= 0; i
< head
->count
; i
++) {
432 struct file
*memfd
= fget(list
[i
].memfd
);
439 ret
= check_memfd_seals(memfd
);
445 ret
= udmabuf_pin_folios(ubuf
, memfd
, list
[i
].offset
,
446 list
[i
].size
, folios
);
452 flags
= head
->flags
& UDMABUF_FLAGS_CLOEXEC
? O_CLOEXEC
: 0;
453 ret
= export_udmabuf(ubuf
, device
, flags
);
461 deinit_udmabuf(ubuf
);
468 static long udmabuf_ioctl_create(struct file
*filp
, unsigned long arg
)
470 struct udmabuf_create create
;
471 struct udmabuf_create_list head
;
472 struct udmabuf_create_item list
;
474 if (copy_from_user(&create
, (void __user
*)arg
,
478 head
.flags
= create
.flags
;
480 list
.memfd
= create
.memfd
;
481 list
.offset
= create
.offset
;
482 list
.size
= create
.size
;
484 return udmabuf_create(filp
->private_data
, &head
, &list
);
487 static long udmabuf_ioctl_create_list(struct file
*filp
, unsigned long arg
)
489 struct udmabuf_create_list head
;
490 struct udmabuf_create_item
*list
;
494 if (copy_from_user(&head
, (void __user
*)arg
, sizeof(head
)))
496 if (head
.count
> list_limit
)
498 lsize
= sizeof(struct udmabuf_create_item
) * head
.count
;
499 list
= memdup_user((void __user
*)(arg
+ sizeof(head
)), lsize
);
501 return PTR_ERR(list
);
503 ret
= udmabuf_create(filp
->private_data
, &head
, list
);
508 static long udmabuf_ioctl(struct file
*filp
, unsigned int ioctl
,
515 ret
= udmabuf_ioctl_create(filp
, arg
);
517 case UDMABUF_CREATE_LIST
:
518 ret
= udmabuf_ioctl_create_list(filp
, arg
);
527 static const struct file_operations udmabuf_fops
= {
528 .owner
= THIS_MODULE
,
529 .unlocked_ioctl
= udmabuf_ioctl
,
531 .compat_ioctl
= udmabuf_ioctl
,
535 static struct miscdevice udmabuf_misc
= {
536 .minor
= MISC_DYNAMIC_MINOR
,
538 .fops
= &udmabuf_fops
,
541 static int __init
udmabuf_dev_init(void)
545 ret
= misc_register(&udmabuf_misc
);
547 pr_err("Could not initialize udmabuf device\n");
551 ret
= dma_coerce_mask_and_coherent(udmabuf_misc
.this_device
,
554 pr_err("Could not setup DMA mask for udmabuf device\n");
555 misc_deregister(&udmabuf_misc
);
562 static void __exit
udmabuf_dev_exit(void)
564 misc_deregister(&udmabuf_misc
);
567 module_init(udmabuf_dev_init
)
568 module_exit(udmabuf_dev_exit
)
570 MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");