1 // SPDX-License-Identifier: GPL-2.0
2 /* XDP user-space packet buffer
3 * Copyright(c) 2018 Intel Corporation.
6 #include <linux/init.h>
7 #include <linux/sched/mm.h>
8 #include <linux/sched/signal.h>
9 #include <linux/sched/task.h>
10 #include <linux/uaccess.h>
11 #include <linux/slab.h>
12 #include <linux/bpf.h>
14 #include <linux/netdevice.h>
15 #include <linux/rtnetlink.h>
16 #include <linux/idr.h>
17 #include <linux/vmalloc.h>
20 #include "xsk_queue.h"
22 static DEFINE_IDA(umem_ida
);
24 static void xdp_umem_unpin_pages(struct xdp_umem
*umem
)
26 unpin_user_pages_dirty_lock(umem
->pgs
, umem
->npgs
, true);
32 static void xdp_umem_unaccount_pages(struct xdp_umem
*umem
)
35 atomic_long_sub(umem
->npgs
, &umem
->user
->locked_vm
);
40 static void xdp_umem_addr_unmap(struct xdp_umem
*umem
)
46 static int xdp_umem_addr_map(struct xdp_umem
*umem
, struct page
**pages
,
49 umem
->addrs
= vmap(pages
, nr_pages
, VM_MAP
, PAGE_KERNEL
);
55 static void xdp_umem_release(struct xdp_umem
*umem
)
58 ida_free(&umem_ida
, umem
->id
);
60 xdp_umem_addr_unmap(umem
);
61 xdp_umem_unpin_pages(umem
);
63 xdp_umem_unaccount_pages(umem
);
67 static void xdp_umem_release_deferred(struct work_struct
*work
)
69 struct xdp_umem
*umem
= container_of(work
, struct xdp_umem
, work
);
71 xdp_umem_release(umem
);
74 void xdp_get_umem(struct xdp_umem
*umem
)
76 refcount_inc(&umem
->users
);
79 void xdp_put_umem(struct xdp_umem
*umem
, bool defer_cleanup
)
84 if (refcount_dec_and_test(&umem
->users
)) {
86 INIT_WORK(&umem
->work
, xdp_umem_release_deferred
);
87 schedule_work(&umem
->work
);
89 xdp_umem_release(umem
);
94 static int xdp_umem_pin_pages(struct xdp_umem
*umem
, unsigned long address
)
96 unsigned int gup_flags
= FOLL_WRITE
;
100 umem
->pgs
= kvcalloc(umem
->npgs
, sizeof(*umem
->pgs
), GFP_KERNEL
| __GFP_NOWARN
);
104 mmap_read_lock(current
->mm
);
105 npgs
= pin_user_pages(address
, umem
->npgs
,
106 gup_flags
| FOLL_LONGTERM
, &umem
->pgs
[0]);
107 mmap_read_unlock(current
->mm
);
109 if (npgs
!= umem
->npgs
) {
121 xdp_umem_unpin_pages(umem
);
128 static int xdp_umem_account_pages(struct xdp_umem
*umem
)
130 unsigned long lock_limit
, new_npgs
, old_npgs
;
132 if (capable(CAP_IPC_LOCK
))
135 lock_limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
136 umem
->user
= get_uid(current_user());
139 old_npgs
= atomic_long_read(&umem
->user
->locked_vm
);
140 new_npgs
= old_npgs
+ umem
->npgs
;
141 if (new_npgs
> lock_limit
) {
142 free_uid(umem
->user
);
146 } while (atomic_long_cmpxchg(&umem
->user
->locked_vm
, old_npgs
,
147 new_npgs
) != old_npgs
);
151 #define XDP_UMEM_FLAGS_VALID ( \
152 XDP_UMEM_UNALIGNED_CHUNK_FLAG | \
153 XDP_UMEM_TX_SW_CSUM | \
154 XDP_UMEM_TX_METADATA_LEN | \
157 static int xdp_umem_reg(struct xdp_umem
*umem
, struct xdp_umem_reg
*mr
)
159 bool unaligned_chunks
= mr
->flags
& XDP_UMEM_UNALIGNED_CHUNK_FLAG
;
160 u32 chunk_size
= mr
->chunk_size
, headroom
= mr
->headroom
;
161 u64 addr
= mr
->addr
, size
= mr
->len
;
162 u32 chunks_rem
, npgs_rem
;
166 if (chunk_size
< XDP_UMEM_MIN_CHUNK_SIZE
|| chunk_size
> PAGE_SIZE
) {
167 /* Strictly speaking we could support this, if:
169 * - using an IOMMU, or
170 * - making sure the memory area is consecutive
171 * but for now, we simply say "computer says no".
176 if (mr
->flags
& ~XDP_UMEM_FLAGS_VALID
)
179 if (!unaligned_chunks
&& !is_power_of_2(chunk_size
))
182 if (!PAGE_ALIGNED(addr
)) {
183 /* Memory area has to be page size aligned. For
184 * simplicity, this might change.
189 if ((addr
+ size
) < addr
)
192 npgs
= div_u64_rem(size
, PAGE_SIZE
, &npgs_rem
);
198 chunks
= div_u64_rem(size
, chunk_size
, &chunks_rem
);
199 if (!chunks
|| chunks
> U32_MAX
)
202 if (!unaligned_chunks
&& chunks_rem
)
205 if (headroom
>= chunk_size
- XDP_PACKET_HEADROOM
)
208 if (mr
->flags
& XDP_UMEM_TX_METADATA_LEN
) {
209 if (mr
->tx_metadata_len
>= 256 || mr
->tx_metadata_len
% 8)
211 umem
->tx_metadata_len
= mr
->tx_metadata_len
;
215 umem
->headroom
= headroom
;
216 umem
->chunk_size
= chunk_size
;
217 umem
->chunks
= chunks
;
221 umem
->flags
= mr
->flags
;
223 INIT_LIST_HEAD(&umem
->xsk_dma_list
);
224 refcount_set(&umem
->users
, 1);
226 err
= xdp_umem_account_pages(umem
);
230 err
= xdp_umem_pin_pages(umem
, (unsigned long)addr
);
234 err
= xdp_umem_addr_map(umem
, umem
->pgs
, umem
->npgs
);
241 xdp_umem_unpin_pages(umem
);
243 xdp_umem_unaccount_pages(umem
);
247 struct xdp_umem
*xdp_umem_create(struct xdp_umem_reg
*mr
)
249 struct xdp_umem
*umem
;
252 umem
= kzalloc(sizeof(*umem
), GFP_KERNEL
);
254 return ERR_PTR(-ENOMEM
);
256 err
= ida_alloc(&umem_ida
, GFP_KERNEL
);
263 err
= xdp_umem_reg(umem
, mr
);
265 ida_free(&umem_ida
, umem
->id
);