1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4 /* Copyright (c) 2008-2019, IBM Corporation */
7 #include <rdma/ib_verbs.h>
8 #include <linux/dma-mapping.h>
9 #include <linux/slab.h>
10 #include <linux/sched/mm.h>
11 #include <linux/resource.h>
17 * Stag lookup is based on its index part only (24 bits).
18 * The code avoids special Stag of zero and tries to randomize
19 * STag values between 1 and SIW_STAG_MAX_INDEX.
21 int siw_mem_add(struct siw_device
*sdev
, struct siw_mem
*m
)
23 struct xa_limit limit
= XA_LIMIT(1, 0x00ffffff);
26 get_random_bytes(&next
, 4);
29 if (xa_alloc_cyclic(&sdev
->mem_xa
, &id
, m
, limit
, &next
,
33 /* Set the STag index part */
36 siw_dbg_mem(m
, "new MEM object\n");
44 * resolves memory from stag given by id. might be called from:
45 * o process context before sending out of sgl, or
46 * o in softirq when resolving target memory
48 struct siw_mem
*siw_mem_id2obj(struct siw_device
*sdev
, int stag_index
)
53 mem
= xa_load(&sdev
->mem_xa
, stag_index
);
54 if (likely(mem
&& kref_get_unless_zero(&mem
->ref
))) {
63 static void siw_free_plist(struct siw_page_chunk
*chunk
, int num_pages
,
66 unpin_user_pages_dirty_lock(chunk
->plist
, num_pages
, dirty
);
69 void siw_umem_release(struct siw_umem
*umem
, bool dirty
)
71 struct mm_struct
*mm_s
= umem
->owning_mm
;
72 int i
, num_pages
= umem
->num_pages
;
74 for (i
= 0; num_pages
; i
++) {
75 int to_free
= min_t(int, PAGES_PER_CHUNK
, num_pages
);
77 siw_free_plist(&umem
->page_chunk
[i
], to_free
,
78 umem
->writable
&& dirty
);
79 kfree(umem
->page_chunk
[i
].plist
);
82 atomic64_sub(umem
->num_pages
, &mm_s
->pinned_vm
);
85 kfree(umem
->page_chunk
);
89 int siw_mr_add_mem(struct siw_mr
*mr
, struct ib_pd
*pd
, void *mem_obj
,
90 u64 start
, u64 len
, int rights
)
92 struct siw_device
*sdev
= to_siw_dev(pd
->device
);
93 struct siw_mem
*mem
= kzalloc(sizeof(*mem
), GFP_KERNEL
);
94 struct xa_limit limit
= XA_LIMIT(1, 0x00ffffff);
100 mem
->mem_obj
= mem_obj
;
106 mem
->perms
= rights
& IWARP_ACCESS_MASK
;
107 kref_init(&mem
->ref
);
111 get_random_bytes(&next
, 4);
114 if (xa_alloc_cyclic(&sdev
->mem_xa
, &id
, mem
, limit
, &next
,
119 /* Set the STag index part */
121 mr
->base_mr
.lkey
= mr
->base_mr
.rkey
= mem
->stag
;
126 void siw_mr_drop_mem(struct siw_mr
*mr
)
128 struct siw_mem
*mem
= mr
->mem
, *found
;
132 /* make STag invalid visible asap */
135 found
= xa_erase(&mem
->sdev
->mem_xa
, mem
->stag
>> 8);
136 WARN_ON(found
!= mem
);
140 void siw_free_mem(struct kref
*ref
)
142 struct siw_mem
*mem
= container_of(ref
, struct siw_mem
, ref
);
144 siw_dbg_mem(mem
, "free mem, pbl: %s\n", mem
->is_pbl
? "y" : "n");
146 if (!mem
->is_mw
&& mem
->mem_obj
) {
147 if (mem
->is_pbl
== 0)
148 siw_umem_release(mem
->umem
, true);
158 * Check protection domain, STAG state, access permissions and
159 * address range for memory object.
161 * @pd: Protection Domain memory should belong to
162 * @mem: memory to be checked
163 * @addr: starting addr of mem
164 * @perms: requested access permissions
165 * @len: len of memory interval to be checked
168 int siw_check_mem(struct ib_pd
*pd
, struct siw_mem
*mem
, u64 addr
,
169 enum ib_access_flags perms
, int len
)
171 if (!mem
->stag_valid
) {
172 siw_dbg_pd(pd
, "STag 0x%08x invalid\n", mem
->stag
);
173 return -E_STAG_INVALID
;
176 siw_dbg_pd(pd
, "STag 0x%08x: PD mismatch\n", mem
->stag
);
177 return -E_PD_MISMATCH
;
180 * check access permissions
182 if ((mem
->perms
& perms
) < perms
) {
183 siw_dbg_pd(pd
, "permissions 0x%08x < 0x%08x\n",
185 return -E_ACCESS_PERM
;
188 * Check if access falls into valid memory interval.
190 if (addr
< mem
->va
|| addr
+ len
> mem
->va
+ mem
->len
) {
191 siw_dbg_pd(pd
, "MEM interval len %d\n", len
);
192 siw_dbg_pd(pd
, "[0x%pK, 0x%pK] out of bounds\n",
193 (void *)(uintptr_t)addr
,
194 (void *)(uintptr_t)(addr
+ len
));
195 siw_dbg_pd(pd
, "[0x%pK, 0x%pK] STag=0x%08x\n",
196 (void *)(uintptr_t)mem
->va
,
197 (void *)(uintptr_t)(mem
->va
+ mem
->len
),
200 return -E_BASE_BOUNDS
;
208 * Check SGE for access rights in given interval
210 * @pd: Protection Domain memory should belong to
211 * @sge: SGE to be checked
212 * @mem: location of memory reference within array
213 * @perms: requested access permissions
214 * @off: starting offset in SGE
215 * @len: len of memory interval to be checked
217 * NOTE: Function references SGE's memory object (mem->obj)
218 * if not yet done. New reference is kept if check went ok and
219 * released if check failed. If mem->obj is already valid, no new
220 * lookup is being done and mem is not released it check fails.
222 int siw_check_sge(struct ib_pd
*pd
, struct siw_sge
*sge
, struct siw_mem
*mem
[],
223 enum ib_access_flags perms
, u32 off
, int len
)
225 struct siw_device
*sdev
= to_siw_dev(pd
->device
);
226 struct siw_mem
*new = NULL
;
227 int rv
= E_ACCESS_OK
;
229 if (len
+ off
> sge
->length
) {
234 new = siw_mem_id2obj(sdev
, sge
->lkey
>> 8);
235 if (unlikely(!new)) {
236 siw_dbg_pd(pd
, "STag unknown: 0x%08x\n", sge
->lkey
);
237 rv
= -E_STAG_INVALID
;
242 /* Check if user re-registered with different STag key */
243 if (unlikely((*mem
)->stag
!= sge
->lkey
)) {
244 siw_dbg_mem((*mem
), "STag mismatch: 0x%08x\n", sge
->lkey
);
245 rv
= -E_STAG_INVALID
;
248 rv
= siw_check_mem(pd
, *mem
, sge
->laddr
+ off
, perms
, len
);
262 void siw_wqe_put_mem(struct siw_wqe
*wqe
, enum siw_opcode op
)
267 case SIW_OP_SEND_WITH_IMM
:
268 case SIW_OP_SEND_REMOTE_INV
:
270 case SIW_OP_READ_LOCAL_INV
:
271 if (!(wqe
->sqe
.flags
& SIW_WQE_INLINE
))
272 siw_unref_mem_sgl(wqe
->mem
, wqe
->sqe
.num_sge
);
276 siw_unref_mem_sgl(wqe
->mem
, wqe
->rqe
.num_sge
);
279 case SIW_OP_READ_RESPONSE
:
280 siw_unref_mem_sgl(wqe
->mem
, 1);
285 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
286 * do not hold memory references
292 int siw_invalidate_stag(struct ib_pd
*pd
, u32 stag
)
294 struct siw_device
*sdev
= to_siw_dev(pd
->device
);
295 struct siw_mem
*mem
= siw_mem_id2obj(sdev
, stag
>> 8);
298 if (unlikely(!mem
)) {
299 siw_dbg_pd(pd
, "STag 0x%08x unknown\n", stag
);
302 if (unlikely(mem
->pd
!= pd
)) {
303 siw_dbg_pd(pd
, "PD mismatch for STag 0x%08x\n", stag
);
308 * Per RDMA verbs definition, an STag may already be in invalid
309 * state if invalidation is requested. So no state check here.
313 siw_dbg_pd(pd
, "STag 0x%08x now invalid\n", stag
);
320 * Gets physical address backed by PBL element. Address is referenced
321 * by linear byte offset into list of variably sized PB elements.
322 * Optionally, provides remaining len within current element, and
323 * current PBL index for later resume at same element.
325 dma_addr_t
siw_pbl_get_buffer(struct siw_pbl
*pbl
, u64 off
, int *len
, int *idx
)
327 int i
= idx
? *idx
: 0;
329 while (i
< pbl
->num_buf
) {
330 struct siw_pble
*pble
= &pbl
->pbe
[i
];
332 if (pble
->pbl_off
+ pble
->size
> off
) {
333 u64 pble_off
= off
- pble
->pbl_off
;
336 *len
= pble
->size
- pble_off
;
340 return pble
->addr
+ pble_off
;
349 struct siw_pbl
*siw_pbl_alloc(u32 num_buf
)
354 return ERR_PTR(-EINVAL
);
356 pbl
= kzalloc(struct_size(pbl
, pbe
, num_buf
), GFP_KERNEL
);
358 return ERR_PTR(-ENOMEM
);
360 pbl
->max_buf
= num_buf
;
365 struct siw_umem
*siw_umem_get(u64 start
, u64 len
, bool writable
)
367 struct siw_umem
*umem
;
368 struct mm_struct
*mm_s
;
370 unsigned long mlock_limit
;
371 unsigned int foll_flags
= FOLL_WRITE
;
372 int num_pages
, num_chunks
, i
, rv
= 0;
375 return ERR_PTR(-EPERM
);
378 return ERR_PTR(-EINVAL
);
380 first_page_va
= start
& PAGE_MASK
;
381 num_pages
= PAGE_ALIGN(start
+ len
- first_page_va
) >> PAGE_SHIFT
;
382 num_chunks
= (num_pages
>> CHUNK_SHIFT
) + 1;
384 umem
= kzalloc(sizeof(*umem
), GFP_KERNEL
);
386 return ERR_PTR(-ENOMEM
);
389 umem
->owning_mm
= mm_s
;
390 umem
->writable
= writable
;
395 foll_flags
|= FOLL_FORCE
;
397 mmap_read_lock(mm_s
);
399 mlock_limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
401 if (num_pages
+ atomic64_read(&mm_s
->pinned_vm
) > mlock_limit
) {
405 umem
->fp_addr
= first_page_va
;
408 kcalloc(num_chunks
, sizeof(struct siw_page_chunk
), GFP_KERNEL
);
409 if (!umem
->page_chunk
) {
413 for (i
= 0; num_pages
; i
++) {
414 int got
, nents
= min_t(int, num_pages
, PAGES_PER_CHUNK
);
416 umem
->page_chunk
[i
].plist
=
417 kcalloc(nents
, sizeof(struct page
*), GFP_KERNEL
);
418 if (!umem
->page_chunk
[i
].plist
) {
424 struct page
**plist
= &umem
->page_chunk
[i
].plist
[got
];
426 rv
= pin_user_pages(first_page_va
, nents
,
427 foll_flags
| FOLL_LONGTERM
,
432 umem
->num_pages
+= rv
;
433 atomic64_add(rv
, &mm_s
->pinned_vm
);
434 first_page_va
+= rv
* PAGE_SIZE
;
441 mmap_read_unlock(mm_s
);
446 siw_umem_release(umem
, false);