1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/errno.h>
6 #include <linux/mman.h>
7 #include <linux/slab.h>
8 #include <linux/vmalloc.h>
9 #include <linux/io_uring.h>
10 #include <linux/io_uring_types.h>
11 #include <asm/shmparam.h>
16 static void *io_mem_alloc_compound(struct page
**pages
, int nr_pages
,
17 size_t size
, gfp_t gfp
)
22 order
= get_order(size
);
23 if (order
> MAX_PAGE_ORDER
)
24 return ERR_PTR(-ENOMEM
);
28 page
= alloc_pages(gfp
, order
);
30 return ERR_PTR(-ENOMEM
);
32 for (i
= 0; i
< nr_pages
; i
++)
35 return page_address(page
);
38 static void *io_mem_alloc_single(struct page
**pages
, int nr_pages
, size_t size
,
44 for (i
= 0; i
< nr_pages
; i
++) {
45 pages
[i
] = alloc_page(gfp
);
50 ret
= vmap(pages
, nr_pages
, VM_MAP
, PAGE_KERNEL
);
56 return ERR_PTR(-ENOMEM
);
59 void *io_pages_map(struct page
***out_pages
, unsigned short *npages
,
62 gfp_t gfp
= GFP_KERNEL_ACCOUNT
| __GFP_ZERO
| __GFP_NOWARN
;
67 nr_pages
= (size
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
;
68 pages
= kvmalloc_array(nr_pages
, sizeof(struct page
*), gfp
);
70 return ERR_PTR(-ENOMEM
);
72 ret
= io_mem_alloc_compound(pages
, nr_pages
, size
, gfp
);
76 ret
= io_mem_alloc_single(pages
, nr_pages
, size
, gfp
);
90 void io_pages_unmap(void *ptr
, struct page
***pages
, unsigned short *npages
,
93 bool do_vunmap
= false;
98 if (put_pages
&& *npages
) {
99 struct page
**to_free
= *pages
;
103 * Only did vmap for the non-compound multiple page case.
104 * For the compound page, we just need to put the head.
106 if (PageCompound(to_free
[0]))
108 else if (*npages
> 1)
110 for (i
= 0; i
< *npages
; i
++)
111 put_page(to_free
[i
]);
120 void io_pages_free(struct page
***pages
, int npages
)
122 struct page
**page_array
= *pages
;
127 unpin_user_pages(page_array
, npages
);
132 struct page
**io_pin_pages(unsigned long uaddr
, unsigned long len
, int *npages
)
134 unsigned long start
, end
, nr_pages
;
138 end
= (uaddr
+ len
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
;
139 start
= uaddr
>> PAGE_SHIFT
;
140 nr_pages
= end
- start
;
141 if (WARN_ON_ONCE(!nr_pages
))
142 return ERR_PTR(-EINVAL
);
144 pages
= kvmalloc_array(nr_pages
, sizeof(struct page
*), GFP_KERNEL
);
146 return ERR_PTR(-ENOMEM
);
148 ret
= pin_user_pages_fast(uaddr
, nr_pages
, FOLL_WRITE
| FOLL_LONGTERM
,
150 /* success, mapped all pages */
151 if (ret
== nr_pages
) {
156 /* partial map, or didn't map anything */
158 /* if we did partial map, release any pages we did get */
160 unpin_user_pages(pages
, ret
);
167 void *__io_uaddr_map(struct page
***pages
, unsigned short *npages
,
168 unsigned long uaddr
, size_t size
)
170 struct page
**page_array
;
171 unsigned int nr_pages
;
176 if (uaddr
& (PAGE_SIZE
- 1) || !size
)
177 return ERR_PTR(-EINVAL
);
180 page_array
= io_pin_pages(uaddr
, size
, &nr_pages
);
181 if (IS_ERR(page_array
))
184 page_addr
= vmap(page_array
, nr_pages
, VM_MAP
, PAGE_KERNEL
);
191 io_pages_free(&page_array
, nr_pages
);
192 return ERR_PTR(-ENOMEM
);
195 static void *io_uring_validate_mmap_request(struct file
*file
, loff_t pgoff
,
198 struct io_ring_ctx
*ctx
= file
->private_data
;
199 loff_t offset
= pgoff
<< PAGE_SHIFT
;
201 switch ((pgoff
<< PAGE_SHIFT
) & IORING_OFF_MMAP_MASK
) {
202 case IORING_OFF_SQ_RING
:
203 case IORING_OFF_CQ_RING
:
204 /* Don't allow mmap if the ring was setup without it */
205 if (ctx
->flags
& IORING_SETUP_NO_MMAP
)
206 return ERR_PTR(-EINVAL
);
208 case IORING_OFF_SQES
:
209 /* Don't allow mmap if the ring was setup without it */
210 if (ctx
->flags
& IORING_SETUP_NO_MMAP
)
211 return ERR_PTR(-EINVAL
);
213 case IORING_OFF_PBUF_RING
: {
214 struct io_buffer_list
*bl
;
218 bgid
= (offset
& ~IORING_OFF_MMAP_MASK
) >> IORING_OFF_PBUF_SHIFT
;
219 bl
= io_pbuf_get_bl(ctx
, bgid
);
228 return ERR_PTR(-EINVAL
);
231 int io_uring_mmap_pages(struct io_ring_ctx
*ctx
, struct vm_area_struct
*vma
,
232 struct page
**pages
, int npages
)
234 unsigned long nr_pages
= npages
;
236 vm_flags_set(vma
, VM_DONTEXPAND
);
237 return vm_insert_pages(vma
, vma
->vm_start
, pages
, &nr_pages
);
242 __cold
int io_uring_mmap(struct file
*file
, struct vm_area_struct
*vma
)
244 struct io_ring_ctx
*ctx
= file
->private_data
;
245 size_t sz
= vma
->vm_end
- vma
->vm_start
;
246 long offset
= vma
->vm_pgoff
<< PAGE_SHIFT
;
250 ptr
= io_uring_validate_mmap_request(file
, vma
->vm_pgoff
, sz
);
254 switch (offset
& IORING_OFF_MMAP_MASK
) {
255 case IORING_OFF_SQ_RING
:
256 case IORING_OFF_CQ_RING
:
257 npages
= min(ctx
->n_ring_pages
, (sz
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
);
258 return io_uring_mmap_pages(ctx
, vma
, ctx
->ring_pages
, npages
);
259 case IORING_OFF_SQES
:
260 return io_uring_mmap_pages(ctx
, vma
, ctx
->sqe_pages
,
262 case IORING_OFF_PBUF_RING
:
263 return io_pbuf_mmap(file
, vma
);
269 unsigned long io_uring_get_unmapped_area(struct file
*filp
, unsigned long addr
,
270 unsigned long len
, unsigned long pgoff
,
276 * Do not allow to map to user-provided address to avoid breaking the
277 * aliasing rules. Userspace is not able to guess the offset address of
278 * kernel kmalloc()ed memory area.
283 ptr
= io_uring_validate_mmap_request(filp
, pgoff
, len
);
288 * Some architectures have strong cache aliasing requirements.
289 * For such architectures we need a coherent mapping which aliases
290 * kernel memory *and* userspace memory. To achieve that:
291 * - use a NULL file pointer to reference physical memory, and
292 * - use the kernel virtual address of the shared io_uring context
293 * (instead of the userspace-provided address, which has to be 0UL
295 * - use the same pgoff which the get_unmapped_area() uses to
296 * calculate the page colouring.
297 * For architectures without such aliasing requirements, the
298 * architecture will return any suitable mapping because addr is 0.
302 pgoff
= 0; /* has been translated to ptr above */
304 addr
= (uintptr_t) ptr
;
305 pgoff
= addr
>> PAGE_SHIFT
;
309 return mm_get_unmapped_area(current
->mm
, filp
, addr
, len
, pgoff
, flags
);
312 #else /* !CONFIG_MMU */
314 int io_uring_mmap(struct file
*file
, struct vm_area_struct
*vma
)
316 return is_nommu_shared_mapping(vma
->vm_flags
) ? 0 : -EINVAL
;
319 unsigned int io_uring_nommu_mmap_capabilities(struct file
*file
)
321 return NOMMU_MAP_DIRECT
| NOMMU_MAP_READ
| NOMMU_MAP_WRITE
;
324 unsigned long io_uring_get_unmapped_area(struct file
*file
, unsigned long addr
,
325 unsigned long len
, unsigned long pgoff
,
330 ptr
= io_uring_validate_mmap_request(file
, pgoff
, len
);
334 return (unsigned long) ptr
;
337 #endif /* !CONFIG_MMU */