2 * Copyright (c) 2016 Hisilicon Limited.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/platform_device.h>
35 #include <linux/vmalloc.h>
36 #include <rdma/ib_umem.h>
37 #include "hns_roce_device.h"
38 #include "hns_roce_cmd.h"
39 #include "hns_roce_hem.h"
41 static u32
hw_index_to_key(unsigned long ind
)
43 return (u32
)(ind
>> 24) | (ind
<< 8);
46 unsigned long key_to_hw_index(u32 key
)
48 return (key
<< 24) | (key
>> 8);
51 static int hns_roce_hw_create_mpt(struct hns_roce_dev
*hr_dev
,
52 struct hns_roce_cmd_mailbox
*mailbox
,
53 unsigned long mpt_index
)
55 return hns_roce_cmd_mbox(hr_dev
, mailbox
->dma
, 0, mpt_index
, 0,
56 HNS_ROCE_CMD_CREATE_MPT
,
57 HNS_ROCE_CMD_TIMEOUT_MSECS
);
60 int hns_roce_hw_destroy_mpt(struct hns_roce_dev
*hr_dev
,
61 struct hns_roce_cmd_mailbox
*mailbox
,
62 unsigned long mpt_index
)
64 return hns_roce_cmd_mbox(hr_dev
, 0, mailbox
? mailbox
->dma
: 0,
65 mpt_index
, !mailbox
, HNS_ROCE_CMD_DESTROY_MPT
,
66 HNS_ROCE_CMD_TIMEOUT_MSECS
);
69 static int alloc_mr_key(struct hns_roce_dev
*hr_dev
, struct hns_roce_mr
*mr
,
70 u32 pd
, u64 iova
, u64 size
, u32 access
)
72 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
73 unsigned long obj
= 0;
76 /* Allocate a key for mr from mr_table */
77 err
= hns_roce_bitmap_alloc(&hr_dev
->mr_table
.mtpt_bitmap
, &obj
);
80 "failed to alloc bitmap for MR key, ret = %d.\n",
85 mr
->iova
= iova
; /* MR va starting addr */
86 mr
->size
= size
; /* MR addr range */
87 mr
->pd
= pd
; /* MR num */
88 mr
->access
= access
; /* MR access permit */
89 mr
->enabled
= 0; /* MR active status */
90 mr
->key
= hw_index_to_key(obj
); /* MR key */
92 err
= hns_roce_table_get(hr_dev
, &hr_dev
->mr_table
.mtpt_table
, obj
);
94 ibdev_err(ibdev
, "failed to alloc mtpt, ret = %d.\n", err
);
100 hns_roce_bitmap_free(&hr_dev
->mr_table
.mtpt_bitmap
, obj
, BITMAP_NO_RR
);
104 static void free_mr_key(struct hns_roce_dev
*hr_dev
, struct hns_roce_mr
*mr
)
106 unsigned long obj
= key_to_hw_index(mr
->key
);
108 hns_roce_table_put(hr_dev
, &hr_dev
->mr_table
.mtpt_table
, obj
);
109 hns_roce_bitmap_free(&hr_dev
->mr_table
.mtpt_bitmap
, obj
, BITMAP_NO_RR
);
112 static int alloc_mr_pbl(struct hns_roce_dev
*hr_dev
, struct hns_roce_mr
*mr
,
113 size_t length
, struct ib_udata
*udata
, u64 start
,
116 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
117 bool is_fast
= mr
->type
== MR_TYPE_FRMR
;
118 struct hns_roce_buf_attr buf_attr
= {};
121 mr
->pbl_hop_num
= is_fast
? 1 : hr_dev
->caps
.pbl_hop_num
;
122 buf_attr
.page_shift
= is_fast
? PAGE_SHIFT
:
123 hr_dev
->caps
.pbl_buf_pg_sz
+ PAGE_SHIFT
;
124 buf_attr
.region
[0].size
= length
;
125 buf_attr
.region
[0].hopnum
= mr
->pbl_hop_num
;
126 buf_attr
.region_count
= 1;
127 buf_attr
.fixed_page
= true;
128 buf_attr
.user_access
= access
;
129 /* fast MR's buffer is alloced before mapping, not at creation */
130 buf_attr
.mtt_only
= is_fast
;
132 err
= hns_roce_mtr_create(hr_dev
, &mr
->pbl_mtr
, &buf_attr
,
133 hr_dev
->caps
.pbl_ba_pg_sz
+ HNS_HW_PAGE_SHIFT
,
136 ibdev_err(ibdev
, "failed to alloc pbl mtr, ret = %d.\n", err
);
138 mr
->npages
= mr
->pbl_mtr
.hem_cfg
.buf_pg_count
;
143 static void free_mr_pbl(struct hns_roce_dev
*hr_dev
, struct hns_roce_mr
*mr
)
145 hns_roce_mtr_destroy(hr_dev
, &mr
->pbl_mtr
);
148 static void hns_roce_mr_free(struct hns_roce_dev
*hr_dev
,
149 struct hns_roce_mr
*mr
)
151 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
155 ret
= hns_roce_hw_destroy_mpt(hr_dev
, NULL
,
156 key_to_hw_index(mr
->key
) &
157 (hr_dev
->caps
.num_mtpts
- 1));
159 ibdev_warn(ibdev
, "failed to destroy mpt, ret = %d.\n",
163 free_mr_pbl(hr_dev
, mr
);
164 free_mr_key(hr_dev
, mr
);
167 static int hns_roce_mr_enable(struct hns_roce_dev
*hr_dev
,
168 struct hns_roce_mr
*mr
)
170 unsigned long mtpt_idx
= key_to_hw_index(mr
->key
);
171 struct hns_roce_cmd_mailbox
*mailbox
;
172 struct device
*dev
= hr_dev
->dev
;
175 /* Allocate mailbox memory */
176 mailbox
= hns_roce_alloc_cmd_mailbox(hr_dev
);
177 if (IS_ERR(mailbox
)) {
178 ret
= PTR_ERR(mailbox
);
182 if (mr
->type
!= MR_TYPE_FRMR
)
183 ret
= hr_dev
->hw
->write_mtpt(hr_dev
, mailbox
->buf
, mr
,
186 ret
= hr_dev
->hw
->frmr_write_mtpt(hr_dev
, mailbox
->buf
, mr
);
188 dev_err(dev
, "failed to write mtpt, ret = %d.\n", ret
);
192 ret
= hns_roce_hw_create_mpt(hr_dev
, mailbox
,
193 mtpt_idx
& (hr_dev
->caps
.num_mtpts
- 1));
195 dev_err(dev
, "failed to create mpt, ret = %d.\n", ret
);
200 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
205 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
210 int hns_roce_init_mr_table(struct hns_roce_dev
*hr_dev
)
212 struct hns_roce_mr_table
*mr_table
= &hr_dev
->mr_table
;
215 ret
= hns_roce_bitmap_init(&mr_table
->mtpt_bitmap
,
216 hr_dev
->caps
.num_mtpts
,
217 hr_dev
->caps
.num_mtpts
- 1,
218 hr_dev
->caps
.reserved_mrws
, 0);
222 void hns_roce_cleanup_mr_table(struct hns_roce_dev
*hr_dev
)
224 struct hns_roce_mr_table
*mr_table
= &hr_dev
->mr_table
;
226 hns_roce_bitmap_cleanup(&mr_table
->mtpt_bitmap
);
229 struct ib_mr
*hns_roce_get_dma_mr(struct ib_pd
*pd
, int acc
)
231 struct hns_roce_dev
*hr_dev
= to_hr_dev(pd
->device
);
232 struct hns_roce_mr
*mr
;
235 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
237 return ERR_PTR(-ENOMEM
);
239 mr
->type
= MR_TYPE_DMA
;
241 /* Allocate memory region key */
242 hns_roce_hem_list_init(&mr
->pbl_mtr
.hem_list
);
243 ret
= alloc_mr_key(hr_dev
, mr
, to_hr_pd(pd
)->pdn
, 0, 0, acc
);
247 ret
= hns_roce_mr_enable(to_hr_dev(pd
->device
), mr
);
251 mr
->ibmr
.rkey
= mr
->ibmr
.lkey
= mr
->key
;
255 free_mr_key(hr_dev
, mr
);
262 struct ib_mr
*hns_roce_reg_user_mr(struct ib_pd
*pd
, u64 start
, u64 length
,
263 u64 virt_addr
, int access_flags
,
264 struct ib_udata
*udata
)
266 struct hns_roce_dev
*hr_dev
= to_hr_dev(pd
->device
);
267 struct hns_roce_mr
*mr
;
270 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
272 return ERR_PTR(-ENOMEM
);
274 mr
->type
= MR_TYPE_MR
;
275 ret
= alloc_mr_key(hr_dev
, mr
, to_hr_pd(pd
)->pdn
, virt_addr
, length
,
280 ret
= alloc_mr_pbl(hr_dev
, mr
, length
, udata
, start
, access_flags
);
284 ret
= hns_roce_mr_enable(hr_dev
, mr
);
288 mr
->ibmr
.rkey
= mr
->ibmr
.lkey
= mr
->key
;
289 mr
->ibmr
.length
= length
;
294 free_mr_pbl(hr_dev
, mr
);
296 free_mr_key(hr_dev
, mr
);
302 static int rereg_mr_trans(struct ib_mr
*ibmr
, int flags
,
303 u64 start
, u64 length
,
304 u64 virt_addr
, int mr_access_flags
,
305 struct hns_roce_cmd_mailbox
*mailbox
,
306 u32 pdn
, struct ib_udata
*udata
)
308 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmr
->device
);
309 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
310 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
313 free_mr_pbl(hr_dev
, mr
);
314 ret
= alloc_mr_pbl(hr_dev
, mr
, length
, udata
, start
, mr_access_flags
);
316 ibdev_err(ibdev
, "failed to create mr PBL, ret = %d.\n", ret
);
320 ret
= hr_dev
->hw
->rereg_write_mtpt(hr_dev
, mr
, flags
, pdn
,
321 mr_access_flags
, virt_addr
,
322 length
, mailbox
->buf
);
324 ibdev_err(ibdev
, "failed to write mtpt, ret = %d.\n", ret
);
325 free_mr_pbl(hr_dev
, mr
);
331 struct ib_mr
*hns_roce_rereg_user_mr(struct ib_mr
*ibmr
, int flags
, u64 start
,
332 u64 length
, u64 virt_addr
,
333 int mr_access_flags
, struct ib_pd
*pd
,
334 struct ib_udata
*udata
)
336 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmr
->device
);
337 struct ib_device
*ib_dev
= &hr_dev
->ib_dev
;
338 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
339 struct hns_roce_cmd_mailbox
*mailbox
;
340 unsigned long mtpt_idx
;
345 return ERR_PTR(-EINVAL
);
347 mailbox
= hns_roce_alloc_cmd_mailbox(hr_dev
);
349 return ERR_CAST(mailbox
);
351 mtpt_idx
= key_to_hw_index(mr
->key
) & (hr_dev
->caps
.num_mtpts
- 1);
352 ret
= hns_roce_cmd_mbox(hr_dev
, 0, mailbox
->dma
, mtpt_idx
, 0,
353 HNS_ROCE_CMD_QUERY_MPT
,
354 HNS_ROCE_CMD_TIMEOUT_MSECS
);
358 ret
= hns_roce_hw_destroy_mpt(hr_dev
, NULL
, mtpt_idx
);
360 ibdev_warn(ib_dev
, "failed to destroy MPT, ret = %d.\n", ret
);
364 if (flags
& IB_MR_REREG_PD
)
365 pdn
= to_hr_pd(pd
)->pdn
;
367 if (flags
& IB_MR_REREG_TRANS
) {
368 ret
= rereg_mr_trans(ibmr
, flags
,
370 virt_addr
, mr_access_flags
,
371 mailbox
, pdn
, udata
);
375 ret
= hr_dev
->hw
->rereg_write_mtpt(hr_dev
, mr
, flags
, pdn
,
376 mr_access_flags
, virt_addr
,
377 length
, mailbox
->buf
);
382 ret
= hns_roce_hw_create_mpt(hr_dev
, mailbox
, mtpt_idx
);
384 ibdev_err(ib_dev
, "failed to create MPT, ret = %d.\n", ret
);
389 if (flags
& IB_MR_REREG_ACCESS
)
390 mr
->access
= mr_access_flags
;
392 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
397 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
402 int hns_roce_dereg_mr(struct ib_mr
*ibmr
, struct ib_udata
*udata
)
404 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmr
->device
);
405 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
408 if (hr_dev
->hw
->dereg_mr
) {
409 ret
= hr_dev
->hw
->dereg_mr(hr_dev
, mr
, udata
);
411 hns_roce_mr_free(hr_dev
, mr
);
418 struct ib_mr
*hns_roce_alloc_mr(struct ib_pd
*pd
, enum ib_mr_type mr_type
,
421 struct hns_roce_dev
*hr_dev
= to_hr_dev(pd
->device
);
422 struct device
*dev
= hr_dev
->dev
;
423 struct hns_roce_mr
*mr
;
427 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
428 return ERR_PTR(-EINVAL
);
430 if (max_num_sg
> HNS_ROCE_FRMR_MAX_PA
) {
431 dev_err(dev
, "max_num_sg larger than %d\n",
432 HNS_ROCE_FRMR_MAX_PA
);
433 return ERR_PTR(-EINVAL
);
436 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
438 return ERR_PTR(-ENOMEM
);
440 mr
->type
= MR_TYPE_FRMR
;
442 /* Allocate memory region key */
443 length
= max_num_sg
* (1 << PAGE_SHIFT
);
444 ret
= alloc_mr_key(hr_dev
, mr
, to_hr_pd(pd
)->pdn
, 0, length
, 0);
448 ret
= alloc_mr_pbl(hr_dev
, mr
, length
, NULL
, 0, 0);
452 ret
= hns_roce_mr_enable(hr_dev
, mr
);
456 mr
->ibmr
.rkey
= mr
->ibmr
.lkey
= mr
->key
;
457 mr
->ibmr
.length
= length
;
462 free_mr_key(hr_dev
, mr
);
464 free_mr_pbl(hr_dev
, mr
);
470 static int hns_roce_set_page(struct ib_mr
*ibmr
, u64 addr
)
472 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
474 if (likely(mr
->npages
< mr
->pbl_mtr
.hem_cfg
.buf_pg_count
)) {
475 mr
->page_list
[mr
->npages
++] = addr
;
482 int hns_roce_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
, int sg_nents
,
483 unsigned int *sg_offset
)
485 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmr
->device
);
486 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
487 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
488 struct hns_roce_mtr
*mtr
= &mr
->pbl_mtr
;
492 mr
->page_list
= kvcalloc(mr
->pbl_mtr
.hem_cfg
.buf_pg_count
,
493 sizeof(dma_addr_t
), GFP_KERNEL
);
497 ret
= ib_sg_to_pages(ibmr
, sg
, sg_nents
, sg_offset
, hns_roce_set_page
);
499 ibdev_err(ibdev
, "failed to store sg pages %u %u, cnt = %d.\n",
500 mr
->npages
, mr
->pbl_mtr
.hem_cfg
.buf_pg_count
, ret
);
504 mtr
->hem_cfg
.region
[0].offset
= 0;
505 mtr
->hem_cfg
.region
[0].count
= mr
->npages
;
506 mtr
->hem_cfg
.region
[0].hopnum
= mr
->pbl_hop_num
;
507 mtr
->hem_cfg
.region_count
= 1;
508 ret
= hns_roce_mtr_map(hr_dev
, mtr
, mr
->page_list
, mr
->npages
);
510 ibdev_err(ibdev
, "failed to map sg mtr, ret = %d.\n", ret
);
513 mr
->pbl_mtr
.hem_cfg
.buf_pg_shift
= (u32
)ilog2(ibmr
->page_size
);
518 kvfree(mr
->page_list
);
519 mr
->page_list
= NULL
;
524 static void hns_roce_mw_free(struct hns_roce_dev
*hr_dev
,
525 struct hns_roce_mw
*mw
)
527 struct device
*dev
= hr_dev
->dev
;
531 ret
= hns_roce_hw_destroy_mpt(hr_dev
, NULL
,
532 key_to_hw_index(mw
->rkey
) &
533 (hr_dev
->caps
.num_mtpts
- 1));
535 dev_warn(dev
, "MW DESTROY_MPT failed (%d)\n", ret
);
537 hns_roce_table_put(hr_dev
, &hr_dev
->mr_table
.mtpt_table
,
538 key_to_hw_index(mw
->rkey
));
541 hns_roce_bitmap_free(&hr_dev
->mr_table
.mtpt_bitmap
,
542 key_to_hw_index(mw
->rkey
), BITMAP_NO_RR
);
545 static int hns_roce_mw_enable(struct hns_roce_dev
*hr_dev
,
546 struct hns_roce_mw
*mw
)
548 struct hns_roce_mr_table
*mr_table
= &hr_dev
->mr_table
;
549 struct hns_roce_cmd_mailbox
*mailbox
;
550 struct device
*dev
= hr_dev
->dev
;
551 unsigned long mtpt_idx
= key_to_hw_index(mw
->rkey
);
554 /* prepare HEM entry memory */
555 ret
= hns_roce_table_get(hr_dev
, &mr_table
->mtpt_table
, mtpt_idx
);
559 mailbox
= hns_roce_alloc_cmd_mailbox(hr_dev
);
560 if (IS_ERR(mailbox
)) {
561 ret
= PTR_ERR(mailbox
);
565 ret
= hr_dev
->hw
->mw_write_mtpt(mailbox
->buf
, mw
);
567 dev_err(dev
, "MW write mtpt fail!\n");
571 ret
= hns_roce_hw_create_mpt(hr_dev
, mailbox
,
572 mtpt_idx
& (hr_dev
->caps
.num_mtpts
- 1));
574 dev_err(dev
, "MW CREATE_MPT failed (%d)\n", ret
);
580 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
585 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
588 hns_roce_table_put(hr_dev
, &mr_table
->mtpt_table
, mtpt_idx
);
593 int hns_roce_alloc_mw(struct ib_mw
*ibmw
, struct ib_udata
*udata
)
595 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmw
->device
);
596 struct hns_roce_mw
*mw
= to_hr_mw(ibmw
);
597 unsigned long index
= 0;
600 /* Allocate a key for mw from bitmap */
601 ret
= hns_roce_bitmap_alloc(&hr_dev
->mr_table
.mtpt_bitmap
, &index
);
605 mw
->rkey
= hw_index_to_key(index
);
607 ibmw
->rkey
= mw
->rkey
;
608 mw
->pdn
= to_hr_pd(ibmw
->pd
)->pdn
;
609 mw
->pbl_hop_num
= hr_dev
->caps
.pbl_hop_num
;
610 mw
->pbl_ba_pg_sz
= hr_dev
->caps
.pbl_ba_pg_sz
;
611 mw
->pbl_buf_pg_sz
= hr_dev
->caps
.pbl_buf_pg_sz
;
613 ret
= hns_roce_mw_enable(hr_dev
, mw
);
620 hns_roce_mw_free(hr_dev
, mw
);
624 int hns_roce_dealloc_mw(struct ib_mw
*ibmw
)
626 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmw
->device
);
627 struct hns_roce_mw
*mw
= to_hr_mw(ibmw
);
629 hns_roce_mw_free(hr_dev
, mw
);
633 static int mtr_map_region(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
634 dma_addr_t
*pages
, struct hns_roce_buf_region
*region
)
644 /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */
648 offset
= region
->offset
;
649 end
= offset
+ region
->count
;
651 while (offset
< end
) {
652 mtts
= hns_roce_hem_list_find_mtt(hr_dev
, &mtr
->hem_list
,
653 offset
, &count
, NULL
);
657 for (i
= 0; i
< count
; i
++) {
658 if (hr_dev
->hw_rev
== HNS_ROCE_HW_VER1
)
659 addr
= to_hr_hw_page_addr(pages
[npage
]);
663 mtts
[i
] = cpu_to_le64(addr
);
672 static inline bool mtr_has_mtt(struct hns_roce_buf_attr
*attr
)
676 for (i
= 0; i
< attr
->region_count
; i
++)
677 if (attr
->region
[i
].hopnum
!= HNS_ROCE_HOP_NUM_0
&&
678 attr
->region
[i
].hopnum
> 0)
681 /* because the mtr only one root base address, when hopnum is 0 means
682 * root base address equals the first buffer address, thus all alloced
683 * memory must in a continuous space accessed by direct mode.
688 static inline size_t mtr_bufs_size(struct hns_roce_buf_attr
*attr
)
693 for (i
= 0; i
< attr
->region_count
; i
++)
694 size
+= attr
->region
[i
].size
;
700 * check the given pages in continuous address space
701 * Returns 0 on success, or the error page num.
703 static inline int mtr_check_direct_pages(dma_addr_t
*pages
, int page_count
,
704 unsigned int page_shift
)
706 size_t page_size
= 1 << page_shift
;
709 for (i
= 1; i
< page_count
; i
++)
710 if (pages
[i
] - pages
[i
- 1] != page_size
)
716 static void mtr_free_bufs(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
)
718 /* release user buffers */
720 ib_umem_release(mtr
->umem
);
724 /* release kernel buffers */
726 hns_roce_buf_free(hr_dev
, mtr
->kmem
);
731 static int mtr_alloc_bufs(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
732 struct hns_roce_buf_attr
*buf_attr
, bool is_direct
,
733 struct ib_udata
*udata
, unsigned long user_addr
)
735 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
736 unsigned int best_pg_shift
;
737 int all_pg_count
= 0;
741 total_size
= mtr_bufs_size(buf_attr
);
742 if (total_size
< 1) {
743 ibdev_err(ibdev
, "failed to check mtr size\n.");
748 unsigned long pgsz_bitmap
;
749 unsigned long page_size
;
752 mtr
->umem
= ib_umem_get(ibdev
, user_addr
, total_size
,
753 buf_attr
->user_access
);
754 if (IS_ERR_OR_NULL(mtr
->umem
)) {
755 ibdev_err(ibdev
, "failed to get umem, ret = %ld.\n",
759 if (buf_attr
->fixed_page
)
760 pgsz_bitmap
= 1 << buf_attr
->page_shift
;
762 pgsz_bitmap
= GENMASK(buf_attr
->page_shift
, PAGE_SHIFT
);
764 page_size
= ib_umem_find_best_pgsz(mtr
->umem
, pgsz_bitmap
,
768 best_pg_shift
= order_base_2(page_size
);
769 all_pg_count
= ib_umem_num_dma_blocks(mtr
->umem
, page_size
);
774 hns_roce_buf_alloc(hr_dev
, total_size
,
775 buf_attr
->page_shift
,
776 is_direct
? HNS_ROCE_BUF_DIRECT
: 0);
777 if (IS_ERR(mtr
->kmem
)) {
778 ibdev_err(ibdev
, "failed to alloc kmem, ret = %ld.\n",
780 return PTR_ERR(mtr
->kmem
);
783 best_pg_shift
= buf_attr
->page_shift
;
784 all_pg_count
= mtr
->kmem
->npages
;
787 /* must bigger than minimum hardware page shift */
788 if (best_pg_shift
< HNS_HW_PAGE_SHIFT
|| all_pg_count
< 1) {
791 "failed to check mtr, page shift = %u count = %d.\n",
792 best_pg_shift
, all_pg_count
);
796 mtr
->hem_cfg
.buf_pg_shift
= best_pg_shift
;
797 mtr
->hem_cfg
.buf_pg_count
= all_pg_count
;
801 mtr_free_bufs(hr_dev
, mtr
);
805 static int mtr_get_pages(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
806 dma_addr_t
*pages
, int count
, unsigned int page_shift
)
808 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
813 npage
= hns_roce_get_umem_bufs(hr_dev
, pages
, count
, 0,
814 mtr
->umem
, page_shift
);
816 npage
= hns_roce_get_kmem_bufs(hr_dev
, pages
, count
, 0,
819 if (mtr
->hem_cfg
.is_direct
&& npage
> 1) {
820 err
= mtr_check_direct_pages(pages
, npage
, page_shift
);
822 ibdev_err(ibdev
, "Failed to check %s direct page-%d\n",
823 mtr
->umem
? "user" : "kernel", err
);
831 int hns_roce_mtr_map(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
832 dma_addr_t
*pages
, unsigned int page_cnt
)
834 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
835 struct hns_roce_buf_region
*r
;
840 * Only use the first page address as root ba when hopnum is 0, this
841 * is because the addresses of all pages are consecutive in this case.
843 if (mtr
->hem_cfg
.is_direct
) {
844 mtr
->hem_cfg
.root_ba
= pages
[0];
848 for (i
= 0; i
< mtr
->hem_cfg
.region_count
; i
++) {
849 r
= &mtr
->hem_cfg
.region
[i
];
850 if (r
->offset
+ r
->count
> page_cnt
) {
853 "failed to check mtr%u end %u + %u, max %u.\n",
854 i
, r
->offset
, r
->count
, page_cnt
);
858 err
= mtr_map_region(hr_dev
, mtr
, &pages
[r
->offset
], r
);
861 "failed to map mtr%u offset %u, ret = %d.\n",
870 int hns_roce_mtr_find(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
871 int offset
, u64
*mtt_buf
, int mtt_max
, u64
*base_addr
)
873 struct hns_roce_hem_cfg
*cfg
= &mtr
->hem_cfg
;
881 if (!mtt_buf
|| mtt_max
< 1)
884 /* no mtt memory in direct mode, so just return the buffer address */
885 if (cfg
->is_direct
) {
886 start_index
= offset
>> HNS_HW_PAGE_SHIFT
;
887 for (mtt_count
= 0; mtt_count
< cfg
->region_count
&&
888 total
< mtt_max
; mtt_count
++) {
889 npage
= cfg
->region
[mtt_count
].offset
;
890 if (npage
< start_index
)
893 addr
= cfg
->root_ba
+ (npage
<< HNS_HW_PAGE_SHIFT
);
894 if (hr_dev
->hw_rev
== HNS_ROCE_HW_VER1
)
895 mtt_buf
[total
] = to_hr_hw_page_addr(addr
);
897 mtt_buf
[total
] = addr
;
905 start_index
= offset
>> cfg
->buf_pg_shift
;
909 mtts
= hns_roce_hem_list_find_mtt(hr_dev
, &mtr
->hem_list
,
912 if (!mtts
|| !mtt_count
)
915 npage
= min(mtt_count
, left
);
917 for (mtt_count
= 0; mtt_count
< npage
; mtt_count
++)
918 mtt_buf
[total
++] = le64_to_cpu(mtts
[mtt_count
]);
923 *base_addr
= cfg
->root_ba
;
928 static int mtr_init_buf_cfg(struct hns_roce_dev
*hr_dev
,
929 struct hns_roce_buf_attr
*attr
,
930 struct hns_roce_hem_cfg
*cfg
,
931 unsigned int *buf_page_shift
)
933 struct hns_roce_buf_region
*r
;
934 unsigned int page_shift
;
939 if (cfg
->is_direct
) {
940 buf_size
= cfg
->buf_pg_count
<< cfg
->buf_pg_shift
;
941 page_cnt
= DIV_ROUND_UP(buf_size
, HNS_HW_PAGE_SIZE
);
943 * When HEM buffer use level-0 addressing, the page size equals
944 * the buffer size, and the the page size = 4K * 2^N.
946 cfg
->buf_pg_shift
= HNS_HW_PAGE_SHIFT
+ order_base_2(page_cnt
);
947 if (attr
->region_count
> 1) {
948 cfg
->buf_pg_count
= page_cnt
;
949 page_shift
= HNS_HW_PAGE_SHIFT
;
951 cfg
->buf_pg_count
= 1;
952 page_shift
= cfg
->buf_pg_shift
;
953 if (buf_size
!= 1 << page_shift
) {
954 ibdev_err(&hr_dev
->ib_dev
,
955 "failed to check direct size %zu shift %d.\n",
956 buf_size
, page_shift
);
961 page_shift
= cfg
->buf_pg_shift
;
964 /* convert buffer size to page index and page count */
965 for (page_cnt
= 0, region_cnt
= 0; page_cnt
< cfg
->buf_pg_count
&&
966 region_cnt
< attr
->region_count
&&
967 region_cnt
< ARRAY_SIZE(cfg
->region
); region_cnt
++) {
968 r
= &cfg
->region
[region_cnt
];
969 r
->offset
= page_cnt
;
970 buf_size
= hr_hw_page_align(attr
->region
[region_cnt
].size
);
971 r
->count
= DIV_ROUND_UP(buf_size
, 1 << page_shift
);
972 page_cnt
+= r
->count
;
973 r
->hopnum
= to_hr_hem_hopnum(attr
->region
[region_cnt
].hopnum
,
977 if (region_cnt
< 1) {
978 ibdev_err(&hr_dev
->ib_dev
,
979 "failed to check mtr region count, pages = %d.\n",
984 cfg
->region_count
= region_cnt
;
985 *buf_page_shift
= page_shift
;
991 * hns_roce_mtr_create - Create hns memory translate region.
993 * @mtr: memory translate region
994 * @buf_attr: buffer attribute for creating mtr
995 * @ba_page_shift: page shift for multi-hop base address table
996 * @udata: user space context, if it's NULL, means kernel space
997 * @user_addr: userspace virtual address to start at
999 int hns_roce_mtr_create(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
1000 struct hns_roce_buf_attr
*buf_attr
,
1001 unsigned int ba_page_shift
, struct ib_udata
*udata
,
1002 unsigned long user_addr
)
1004 struct hns_roce_hem_cfg
*cfg
= &mtr
->hem_cfg
;
1005 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
1006 unsigned int buf_page_shift
= 0;
1007 dma_addr_t
*pages
= NULL
;
1012 /* if disable mtt, all pages must in a continuous address range */
1013 cfg
->is_direct
= !mtr_has_mtt(buf_attr
);
1015 /* if buffer only need mtt, just init the hem cfg */
1016 if (buf_attr
->mtt_only
) {
1017 cfg
->buf_pg_shift
= buf_attr
->page_shift
;
1018 cfg
->buf_pg_count
= mtr_bufs_size(buf_attr
) >>
1019 buf_attr
->page_shift
;
1023 ret
= mtr_alloc_bufs(hr_dev
, mtr
, buf_attr
, cfg
->is_direct
,
1027 "failed to alloc mtr bufs, ret = %d.\n", ret
);
1032 all_pg_cnt
= mtr_init_buf_cfg(hr_dev
, buf_attr
, cfg
, &buf_page_shift
);
1033 if (all_pg_cnt
< 1) {
1035 ibdev_err(ibdev
, "failed to init mtr buf cfg.\n");
1036 goto err_alloc_bufs
;
1039 hns_roce_hem_list_init(&mtr
->hem_list
);
1040 if (!cfg
->is_direct
) {
1041 ret
= hns_roce_hem_list_request(hr_dev
, &mtr
->hem_list
,
1042 cfg
->region
, cfg
->region_count
,
1045 ibdev_err(ibdev
, "failed to request mtr hem, ret = %d.\n",
1047 goto err_alloc_bufs
;
1049 cfg
->root_ba
= mtr
->hem_list
.root_ba
;
1050 cfg
->ba_pg_shift
= ba_page_shift
;
1052 cfg
->ba_pg_shift
= cfg
->buf_pg_shift
;
1055 /* no buffer to map */
1056 if (buf_attr
->mtt_only
)
1059 /* alloc a tmp array to store buffer's dma address */
1060 pages
= kvcalloc(all_pg_cnt
, sizeof(dma_addr_t
), GFP_KERNEL
);
1063 ibdev_err(ibdev
, "failed to alloc mtr page list %d.\n",
1065 goto err_alloc_hem_list
;
1068 get_pg_cnt
= mtr_get_pages(hr_dev
, mtr
, pages
, all_pg_cnt
,
1070 if (get_pg_cnt
!= all_pg_cnt
) {
1071 ibdev_err(ibdev
, "failed to get mtr page %d != %d.\n",
1072 get_pg_cnt
, all_pg_cnt
);
1074 goto err_alloc_page_list
;
1077 /* write buffer's dma address to BA table */
1078 ret
= hns_roce_mtr_map(hr_dev
, mtr
, pages
, all_pg_cnt
);
1080 ibdev_err(ibdev
, "failed to map mtr pages, ret = %d.\n", ret
);
1081 goto err_alloc_page_list
;
1084 /* drop tmp array */
1087 err_alloc_page_list
:
1090 hns_roce_hem_list_release(hr_dev
, &mtr
->hem_list
);
1092 mtr_free_bufs(hr_dev
, mtr
);
1096 void hns_roce_mtr_destroy(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
)
1098 /* release multi-hop addressing resource */
1099 hns_roce_hem_list_release(hr_dev
, &mtr
->hem_list
);
1102 mtr_free_bufs(hr_dev
, mtr
);