2 * Copyright (c) 2016 Hisilicon Limited.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/vmalloc.h>
35 #include <linux/count_zeros.h>
36 #include <rdma/ib_umem.h>
37 #include <linux/math.h>
38 #include "hns_roce_device.h"
39 #include "hns_roce_cmd.h"
40 #include "hns_roce_hem.h"
42 static u32
hw_index_to_key(int ind
)
44 return ((u32
)ind
>> 24) | ((u32
)ind
<< 8);
47 unsigned long key_to_hw_index(u32 key
)
49 return (key
<< 24) | (key
>> 8);
52 static int alloc_mr_key(struct hns_roce_dev
*hr_dev
, struct hns_roce_mr
*mr
)
54 struct hns_roce_ida
*mtpt_ida
= &hr_dev
->mr_table
.mtpt_ida
;
55 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
59 /* Allocate a key for mr from mr_table */
60 id
= ida_alloc_range(&mtpt_ida
->ida
, mtpt_ida
->min
, mtpt_ida
->max
,
63 ibdev_err(ibdev
, "failed to alloc id for MR key, id(%d)\n", id
);
67 mr
->key
= hw_index_to_key(id
); /* MR key */
69 err
= hns_roce_table_get(hr_dev
, &hr_dev
->mr_table
.mtpt_table
,
72 ibdev_err(ibdev
, "failed to alloc mtpt, ret = %d.\n", err
);
78 ida_free(&mtpt_ida
->ida
, id
);
82 static void free_mr_key(struct hns_roce_dev
*hr_dev
, struct hns_roce_mr
*mr
)
84 unsigned long obj
= key_to_hw_index(mr
->key
);
86 hns_roce_table_put(hr_dev
, &hr_dev
->mr_table
.mtpt_table
, obj
);
87 ida_free(&hr_dev
->mr_table
.mtpt_ida
.ida
, (int)obj
);
90 static int alloc_mr_pbl(struct hns_roce_dev
*hr_dev
, struct hns_roce_mr
*mr
,
91 struct ib_udata
*udata
, u64 start
)
93 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
94 bool is_fast
= mr
->type
== MR_TYPE_FRMR
;
95 struct hns_roce_buf_attr buf_attr
= {};
98 mr
->pbl_hop_num
= is_fast
? 1 : hr_dev
->caps
.pbl_hop_num
;
99 buf_attr
.page_shift
= is_fast
? PAGE_SHIFT
:
100 hr_dev
->caps
.pbl_buf_pg_sz
+ PAGE_SHIFT
;
101 buf_attr
.region
[0].size
= mr
->size
;
102 buf_attr
.region
[0].hopnum
= mr
->pbl_hop_num
;
103 buf_attr
.region_count
= 1;
104 buf_attr
.user_access
= mr
->access
;
105 /* fast MR's buffer is alloced before mapping, not at creation */
106 buf_attr
.mtt_only
= is_fast
;
107 buf_attr
.iova
= mr
->iova
;
108 /* pagesize and hopnum is fixed for fast MR */
109 buf_attr
.adaptive
= !is_fast
;
110 buf_attr
.type
= MTR_PBL
;
112 err
= hns_roce_mtr_create(hr_dev
, &mr
->pbl_mtr
, &buf_attr
,
113 hr_dev
->caps
.pbl_ba_pg_sz
+ PAGE_SHIFT
,
116 ibdev_err(ibdev
, "failed to alloc pbl mtr, ret = %d.\n", err
);
120 mr
->npages
= mr
->pbl_mtr
.hem_cfg
.buf_pg_count
;
121 mr
->pbl_hop_num
= buf_attr
.region
[0].hopnum
;
126 static void free_mr_pbl(struct hns_roce_dev
*hr_dev
, struct hns_roce_mr
*mr
)
128 hns_roce_mtr_destroy(hr_dev
, &mr
->pbl_mtr
);
131 static void hns_roce_mr_free(struct hns_roce_dev
*hr_dev
, struct hns_roce_mr
*mr
)
133 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
137 ret
= hns_roce_destroy_hw_ctx(hr_dev
, HNS_ROCE_CMD_DESTROY_MPT
,
138 key_to_hw_index(mr
->key
) &
139 (hr_dev
->caps
.num_mtpts
- 1));
141 ibdev_warn_ratelimited(ibdev
, "failed to destroy mpt, ret = %d.\n",
145 free_mr_pbl(hr_dev
, mr
);
146 free_mr_key(hr_dev
, mr
);
149 static int hns_roce_mr_enable(struct hns_roce_dev
*hr_dev
,
150 struct hns_roce_mr
*mr
)
152 unsigned long mtpt_idx
= key_to_hw_index(mr
->key
);
153 struct hns_roce_cmd_mailbox
*mailbox
;
154 struct device
*dev
= hr_dev
->dev
;
157 /* Allocate mailbox memory */
158 mailbox
= hns_roce_alloc_cmd_mailbox(hr_dev
);
160 return PTR_ERR(mailbox
);
162 if (mr
->type
!= MR_TYPE_FRMR
)
163 ret
= hr_dev
->hw
->write_mtpt(hr_dev
, mailbox
->buf
, mr
);
165 ret
= hr_dev
->hw
->frmr_write_mtpt(mailbox
->buf
, mr
);
167 dev_err(dev
, "failed to write mtpt, ret = %d.\n", ret
);
171 ret
= hns_roce_create_hw_ctx(hr_dev
, mailbox
, HNS_ROCE_CMD_CREATE_MPT
,
172 mtpt_idx
& (hr_dev
->caps
.num_mtpts
- 1));
174 dev_err(dev
, "failed to create mpt, ret = %d.\n", ret
);
181 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
186 void hns_roce_init_mr_table(struct hns_roce_dev
*hr_dev
)
188 struct hns_roce_ida
*mtpt_ida
= &hr_dev
->mr_table
.mtpt_ida
;
190 ida_init(&mtpt_ida
->ida
);
191 mtpt_ida
->max
= hr_dev
->caps
.num_mtpts
- 1;
192 mtpt_ida
->min
= hr_dev
->caps
.reserved_mrws
;
195 struct ib_mr
*hns_roce_get_dma_mr(struct ib_pd
*pd
, int acc
)
197 struct hns_roce_dev
*hr_dev
= to_hr_dev(pd
->device
);
198 struct hns_roce_mr
*mr
;
201 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
203 return ERR_PTR(-ENOMEM
);
205 mr
->type
= MR_TYPE_DMA
;
206 mr
->pd
= to_hr_pd(pd
)->pdn
;
209 /* Allocate memory region key */
210 hns_roce_hem_list_init(&mr
->pbl_mtr
.hem_list
);
211 ret
= alloc_mr_key(hr_dev
, mr
);
215 ret
= hns_roce_mr_enable(hr_dev
, mr
);
219 mr
->ibmr
.rkey
= mr
->ibmr
.lkey
= mr
->key
;
223 free_mr_key(hr_dev
, mr
);
230 struct ib_mr
*hns_roce_reg_user_mr(struct ib_pd
*pd
, u64 start
, u64 length
,
231 u64 virt_addr
, int access_flags
,
232 struct ib_udata
*udata
)
234 struct hns_roce_dev
*hr_dev
= to_hr_dev(pd
->device
);
235 struct hns_roce_mr
*mr
;
238 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
244 mr
->iova
= virt_addr
;
246 mr
->pd
= to_hr_pd(pd
)->pdn
;
247 mr
->access
= access_flags
;
248 mr
->type
= MR_TYPE_MR
;
250 ret
= alloc_mr_key(hr_dev
, mr
);
254 ret
= alloc_mr_pbl(hr_dev
, mr
, udata
, start
);
258 ret
= hns_roce_mr_enable(hr_dev
, mr
);
262 mr
->ibmr
.rkey
= mr
->ibmr
.lkey
= mr
->key
;
267 free_mr_pbl(hr_dev
, mr
);
269 free_mr_key(hr_dev
, mr
);
273 atomic64_inc(&hr_dev
->dfx_cnt
[HNS_ROCE_DFX_MR_REG_ERR_CNT
]);
278 struct ib_mr
*hns_roce_rereg_user_mr(struct ib_mr
*ibmr
, int flags
, u64 start
,
279 u64 length
, u64 virt_addr
,
280 int mr_access_flags
, struct ib_pd
*pd
,
281 struct ib_udata
*udata
)
283 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmr
->device
);
284 struct ib_device
*ib_dev
= &hr_dev
->ib_dev
;
285 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
286 struct hns_roce_cmd_mailbox
*mailbox
;
287 unsigned long mtpt_idx
;
295 mailbox
= hns_roce_alloc_cmd_mailbox(hr_dev
);
296 ret
= PTR_ERR_OR_ZERO(mailbox
);
300 mtpt_idx
= key_to_hw_index(mr
->key
) & (hr_dev
->caps
.num_mtpts
- 1);
302 ret
= hns_roce_cmd_mbox(hr_dev
, 0, mailbox
->dma
, HNS_ROCE_CMD_QUERY_MPT
,
307 ret
= hns_roce_destroy_hw_ctx(hr_dev
, HNS_ROCE_CMD_DESTROY_MPT
,
310 ibdev_warn(ib_dev
, "failed to destroy MPT, ret = %d.\n", ret
);
313 mr
->iova
= virt_addr
;
316 if (flags
& IB_MR_REREG_PD
)
317 mr
->pd
= to_hr_pd(pd
)->pdn
;
319 if (flags
& IB_MR_REREG_ACCESS
)
320 mr
->access
= mr_access_flags
;
322 if (flags
& IB_MR_REREG_TRANS
) {
323 free_mr_pbl(hr_dev
, mr
);
324 ret
= alloc_mr_pbl(hr_dev
, mr
, udata
, start
);
326 ibdev_err(ib_dev
, "failed to alloc mr PBL, ret = %d.\n",
332 ret
= hr_dev
->hw
->rereg_write_mtpt(hr_dev
, mr
, flags
, mailbox
->buf
);
334 ibdev_err(ib_dev
, "failed to write mtpt, ret = %d.\n", ret
);
338 ret
= hns_roce_create_hw_ctx(hr_dev
, mailbox
, HNS_ROCE_CMD_CREATE_MPT
,
341 ibdev_err(ib_dev
, "failed to create MPT, ret = %d.\n", ret
);
348 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
352 atomic64_inc(&hr_dev
->dfx_cnt
[HNS_ROCE_DFX_MR_REREG_ERR_CNT
]);
359 int hns_roce_dereg_mr(struct ib_mr
*ibmr
, struct ib_udata
*udata
)
361 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmr
->device
);
362 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
364 if (hr_dev
->hw
->dereg_mr
)
365 hr_dev
->hw
->dereg_mr(hr_dev
);
367 hns_roce_mr_free(hr_dev
, mr
);
373 struct ib_mr
*hns_roce_alloc_mr(struct ib_pd
*pd
, enum ib_mr_type mr_type
,
376 struct hns_roce_dev
*hr_dev
= to_hr_dev(pd
->device
);
377 struct device
*dev
= hr_dev
->dev
;
378 struct hns_roce_mr
*mr
;
381 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
382 return ERR_PTR(-EINVAL
);
384 if (max_num_sg
> HNS_ROCE_FRMR_MAX_PA
) {
385 dev_err(dev
, "max_num_sg larger than %d\n",
386 HNS_ROCE_FRMR_MAX_PA
);
387 return ERR_PTR(-EINVAL
);
390 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
392 return ERR_PTR(-ENOMEM
);
394 mr
->type
= MR_TYPE_FRMR
;
395 mr
->pd
= to_hr_pd(pd
)->pdn
;
396 mr
->size
= max_num_sg
* (1 << PAGE_SHIFT
);
398 /* Allocate memory region key */
399 ret
= alloc_mr_key(hr_dev
, mr
);
403 ret
= alloc_mr_pbl(hr_dev
, mr
, NULL
, 0);
407 ret
= hns_roce_mr_enable(hr_dev
, mr
);
411 mr
->ibmr
.rkey
= mr
->ibmr
.lkey
= mr
->key
;
412 mr
->ibmr
.length
= mr
->size
;
417 free_mr_pbl(hr_dev
, mr
);
419 free_mr_key(hr_dev
, mr
);
425 static int hns_roce_set_page(struct ib_mr
*ibmr
, u64 addr
)
427 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
429 if (likely(mr
->npages
< mr
->pbl_mtr
.hem_cfg
.buf_pg_count
)) {
430 mr
->page_list
[mr
->npages
++] = addr
;
437 int hns_roce_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
, int sg_nents
,
438 unsigned int *sg_offset_p
)
440 unsigned int sg_offset
= sg_offset_p
? *sg_offset_p
: 0;
441 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmr
->device
);
442 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
443 struct hns_roce_mr
*mr
= to_hr_mr(ibmr
);
444 struct hns_roce_mtr
*mtr
= &mr
->pbl_mtr
;
447 if (!IS_ALIGNED(sg_offset
, HNS_ROCE_FRMR_ALIGN_SIZE
) ||
448 ibmr
->page_size
< HNS_HW_PAGE_SIZE
||
449 ibmr
->page_size
> HNS_HW_MAX_PAGE_SIZE
)
453 mr
->page_list
= kvcalloc(mr
->pbl_mtr
.hem_cfg
.buf_pg_count
,
454 sizeof(dma_addr_t
), GFP_KERNEL
);
458 sg_num
= ib_sg_to_pages(ibmr
, sg
, sg_nents
, sg_offset_p
, hns_roce_set_page
);
460 ibdev_err(ibdev
, "failed to store sg pages %u %u, cnt = %d.\n",
461 mr
->npages
, mr
->pbl_mtr
.hem_cfg
.buf_pg_count
, sg_num
);
465 mtr
->hem_cfg
.region
[0].offset
= 0;
466 mtr
->hem_cfg
.region
[0].count
= mr
->npages
;
467 mtr
->hem_cfg
.region
[0].hopnum
= mr
->pbl_hop_num
;
468 mtr
->hem_cfg
.region_count
= 1;
469 ret
= hns_roce_mtr_map(hr_dev
, mtr
, mr
->page_list
, mr
->npages
);
471 ibdev_err(ibdev
, "failed to map sg mtr, ret = %d.\n", ret
);
474 mr
->pbl_mtr
.hem_cfg
.buf_pg_shift
= (u32
)ilog2(ibmr
->page_size
);
478 kvfree(mr
->page_list
);
479 mr
->page_list
= NULL
;
484 static void hns_roce_mw_free(struct hns_roce_dev
*hr_dev
,
485 struct hns_roce_mw
*mw
)
487 struct device
*dev
= hr_dev
->dev
;
491 ret
= hns_roce_destroy_hw_ctx(hr_dev
, HNS_ROCE_CMD_DESTROY_MPT
,
492 key_to_hw_index(mw
->rkey
) &
493 (hr_dev
->caps
.num_mtpts
- 1));
495 dev_warn(dev
, "MW DESTROY_MPT failed (%d)\n", ret
);
497 hns_roce_table_put(hr_dev
, &hr_dev
->mr_table
.mtpt_table
,
498 key_to_hw_index(mw
->rkey
));
501 ida_free(&hr_dev
->mr_table
.mtpt_ida
.ida
,
502 (int)key_to_hw_index(mw
->rkey
));
505 static int hns_roce_mw_enable(struct hns_roce_dev
*hr_dev
,
506 struct hns_roce_mw
*mw
)
508 struct hns_roce_mr_table
*mr_table
= &hr_dev
->mr_table
;
509 struct hns_roce_cmd_mailbox
*mailbox
;
510 struct device
*dev
= hr_dev
->dev
;
511 unsigned long mtpt_idx
= key_to_hw_index(mw
->rkey
);
514 /* prepare HEM entry memory */
515 ret
= hns_roce_table_get(hr_dev
, &mr_table
->mtpt_table
, mtpt_idx
);
519 mailbox
= hns_roce_alloc_cmd_mailbox(hr_dev
);
520 if (IS_ERR(mailbox
)) {
521 ret
= PTR_ERR(mailbox
);
525 ret
= hr_dev
->hw
->mw_write_mtpt(mailbox
->buf
, mw
);
527 dev_err(dev
, "MW write mtpt fail!\n");
531 ret
= hns_roce_create_hw_ctx(hr_dev
, mailbox
, HNS_ROCE_CMD_CREATE_MPT
,
532 mtpt_idx
& (hr_dev
->caps
.num_mtpts
- 1));
534 dev_err(dev
, "MW CREATE_MPT failed (%d)\n", ret
);
540 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
545 hns_roce_free_cmd_mailbox(hr_dev
, mailbox
);
548 hns_roce_table_put(hr_dev
, &mr_table
->mtpt_table
, mtpt_idx
);
553 int hns_roce_alloc_mw(struct ib_mw
*ibmw
, struct ib_udata
*udata
)
555 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmw
->device
);
556 struct hns_roce_ida
*mtpt_ida
= &hr_dev
->mr_table
.mtpt_ida
;
557 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
558 struct hns_roce_mw
*mw
= to_hr_mw(ibmw
);
562 /* Allocate a key for mw from mr_table */
563 id
= ida_alloc_range(&mtpt_ida
->ida
, mtpt_ida
->min
, mtpt_ida
->max
,
566 ibdev_err(ibdev
, "failed to alloc id for MW key, id(%d)\n", id
);
570 mw
->rkey
= hw_index_to_key(id
);
572 ibmw
->rkey
= mw
->rkey
;
573 mw
->pdn
= to_hr_pd(ibmw
->pd
)->pdn
;
574 mw
->pbl_hop_num
= hr_dev
->caps
.pbl_hop_num
;
575 mw
->pbl_ba_pg_sz
= hr_dev
->caps
.pbl_ba_pg_sz
;
576 mw
->pbl_buf_pg_sz
= hr_dev
->caps
.pbl_buf_pg_sz
;
578 ret
= hns_roce_mw_enable(hr_dev
, mw
);
585 hns_roce_mw_free(hr_dev
, mw
);
589 int hns_roce_dealloc_mw(struct ib_mw
*ibmw
)
591 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibmw
->device
);
592 struct hns_roce_mw
*mw
= to_hr_mw(ibmw
);
594 hns_roce_mw_free(hr_dev
, mw
);
598 static int mtr_map_region(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
599 struct hns_roce_buf_region
*region
, dma_addr_t
*pages
,
608 offset
= region
->offset
;
609 end
= offset
+ region
->count
;
611 while (offset
< end
&& npage
< max_count
) {
613 mtts
= hns_roce_hem_list_find_mtt(hr_dev
, &mtr
->hem_list
,
618 for (i
= 0; i
< count
&& npage
< max_count
; i
++) {
621 mtts
[i
] = cpu_to_le64(addr
);
630 static inline bool mtr_has_mtt(struct hns_roce_buf_attr
*attr
)
634 for (i
= 0; i
< attr
->region_count
; i
++)
635 if (attr
->region
[i
].hopnum
!= HNS_ROCE_HOP_NUM_0
&&
636 attr
->region
[i
].hopnum
> 0)
639 /* because the mtr only one root base address, when hopnum is 0 means
640 * root base address equals the first buffer address, thus all alloced
641 * memory must in a continuous space accessed by direct mode.
646 static inline size_t mtr_bufs_size(struct hns_roce_buf_attr
*attr
)
651 for (i
= 0; i
< attr
->region_count
; i
++)
652 size
+= attr
->region
[i
].size
;
658 * check the given pages in continuous address space
659 * Returns 0 on success, or the error page num.
661 static inline int mtr_check_direct_pages(dma_addr_t
*pages
, int page_count
,
662 unsigned int page_shift
)
664 size_t page_size
= 1 << page_shift
;
667 for (i
= 1; i
< page_count
; i
++)
668 if (pages
[i
] - pages
[i
- 1] != page_size
)
674 static void mtr_free_bufs(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
)
676 /* release user buffers */
678 ib_umem_release(mtr
->umem
);
682 /* release kernel buffers */
684 hns_roce_buf_free(hr_dev
, mtr
->kmem
);
689 static int mtr_alloc_bufs(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
690 struct hns_roce_buf_attr
*buf_attr
,
691 struct ib_udata
*udata
, unsigned long user_addr
)
693 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
696 total_size
= mtr_bufs_size(buf_attr
);
700 mtr
->umem
= ib_umem_get(ibdev
, user_addr
, total_size
,
701 buf_attr
->user_access
);
702 if (IS_ERR(mtr
->umem
)) {
703 ibdev_err(ibdev
, "failed to get umem, ret = %ld.\n",
709 mtr
->kmem
= hns_roce_buf_alloc(hr_dev
, total_size
,
710 buf_attr
->page_shift
,
711 !mtr_has_mtt(buf_attr
) ?
712 HNS_ROCE_BUF_DIRECT
: 0);
713 if (IS_ERR(mtr
->kmem
)) {
714 ibdev_err(ibdev
, "failed to alloc kmem, ret = %ld.\n",
716 return PTR_ERR(mtr
->kmem
);
723 static int cal_mtr_pg_cnt(struct hns_roce_mtr
*mtr
)
725 struct hns_roce_buf_region
*region
;
729 for (i
= 0; i
< mtr
->hem_cfg
.region_count
; i
++) {
730 region
= &mtr
->hem_cfg
.region
[i
];
731 page_cnt
+= region
->count
;
737 static bool need_split_huge_page(struct hns_roce_mtr
*mtr
)
739 /* When HEM buffer uses 0-level addressing, the page size is
740 * equal to the whole buffer size. If the current MTR has multiple
741 * regions, we split the buffer into small pages(4k, required by hns
742 * ROCEE). These pages will be used in multiple regions.
744 return mtr
->hem_cfg
.is_direct
&& mtr
->hem_cfg
.region_count
> 1;
747 static int mtr_map_bufs(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
)
749 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
750 int page_count
= cal_mtr_pg_cnt(mtr
);
751 unsigned int page_shift
;
756 page_shift
= need_split_huge_page(mtr
) ? HNS_HW_PAGE_SHIFT
:
757 mtr
->hem_cfg
.buf_pg_shift
;
758 /* alloc a tmp array to store buffer's dma address */
759 pages
= kvcalloc(page_count
, sizeof(dma_addr_t
), GFP_KERNEL
);
764 npage
= hns_roce_get_umem_bufs(pages
, page_count
,
765 mtr
->umem
, page_shift
);
767 npage
= hns_roce_get_kmem_bufs(hr_dev
, pages
, page_count
,
768 mtr
->kmem
, page_shift
);
770 if (npage
!= page_count
) {
771 ibdev_err(ibdev
, "failed to get mtr page %d != %d.\n", npage
,
777 if (need_split_huge_page(mtr
) && npage
> 1) {
778 ret
= mtr_check_direct_pages(pages
, npage
, page_shift
);
780 ibdev_err(ibdev
, "failed to check %s page: %d / %d.\n",
781 mtr
->umem
? "umtr" : "kmtr", ret
, npage
);
787 ret
= hns_roce_mtr_map(hr_dev
, mtr
, pages
, page_count
);
789 ibdev_err(ibdev
, "failed to map mtr pages, ret = %d.\n", ret
);
797 int hns_roce_mtr_map(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
798 dma_addr_t
*pages
, unsigned int page_cnt
)
800 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
801 struct hns_roce_buf_region
*r
;
802 unsigned int i
, mapped_cnt
;
806 * Only use the first page address as root ba when hopnum is 0, this
807 * is because the addresses of all pages are consecutive in this case.
809 if (mtr
->hem_cfg
.is_direct
) {
810 mtr
->hem_cfg
.root_ba
= pages
[0];
814 for (i
= 0, mapped_cnt
= 0; i
< mtr
->hem_cfg
.region_count
&&
815 mapped_cnt
< page_cnt
; i
++) {
816 r
= &mtr
->hem_cfg
.region
[i
];
817 /* if hopnum is 0, no need to map pages in this region */
819 mapped_cnt
+= r
->count
;
823 if (r
->offset
+ r
->count
> page_cnt
) {
826 "failed to check mtr%u count %u + %u > %u.\n",
827 i
, r
->offset
, r
->count
, page_cnt
);
831 ret
= mtr_map_region(hr_dev
, mtr
, r
, &pages
[r
->offset
],
832 page_cnt
- mapped_cnt
);
835 "failed to map mtr%u offset %u, ret = %d.\n",
843 if (mapped_cnt
< page_cnt
) {
845 ibdev_err(ibdev
, "failed to map mtr pages count: %u < %u.\n",
846 mapped_cnt
, page_cnt
);
852 static int hns_roce_get_direct_addr_mtt(struct hns_roce_hem_cfg
*cfg
,
853 u32 start_index
, u64
*mtt_buf
,
861 if (mtt_cnt
> cfg
->region_count
)
864 for (mtt_count
= 0; mtt_count
< cfg
->region_count
&& total
< mtt_cnt
;
866 npage
= cfg
->region
[mtt_count
].offset
;
867 if (npage
< start_index
)
870 addr
= cfg
->root_ba
+ (npage
<< HNS_HW_PAGE_SHIFT
);
871 mtt_buf
[total
] = addr
;
882 static int hns_roce_get_mhop_mtt(struct hns_roce_dev
*hr_dev
,
883 struct hns_roce_mtr
*mtr
, u32 start_index
,
884 u64
*mtt_buf
, int mtt_cnt
)
894 mtts
= hns_roce_hem_list_find_mtt(hr_dev
, &mtr
->hem_list
,
897 if (!mtts
|| !mtt_count
)
900 npage
= min(mtt_count
, left
);
902 for (mtt_count
= 0; mtt_count
< npage
; mtt_count
++)
903 mtt_buf
[total
++] = le64_to_cpu(mtts
[mtt_count
]);
912 int hns_roce_mtr_find(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
913 u32 offset
, u64
*mtt_buf
, int mtt_max
)
915 struct hns_roce_hem_cfg
*cfg
= &mtr
->hem_cfg
;
919 if (!mtt_buf
|| mtt_max
< 1)
922 /* no mtt memory in direct mode, so just return the buffer address */
923 if (cfg
->is_direct
) {
924 start_index
= offset
>> HNS_HW_PAGE_SHIFT
;
925 ret
= hns_roce_get_direct_addr_mtt(cfg
, start_index
,
928 start_index
= offset
>> cfg
->buf_pg_shift
;
929 ret
= hns_roce_get_mhop_mtt(hr_dev
, mtr
, start_index
,
935 static int get_best_page_shift(struct hns_roce_dev
*hr_dev
,
936 struct hns_roce_mtr
*mtr
,
937 struct hns_roce_buf_attr
*buf_attr
)
939 unsigned int page_sz
;
941 if (!buf_attr
->adaptive
|| buf_attr
->type
!= MTR_PBL
|| !mtr
->umem
)
944 page_sz
= ib_umem_find_best_pgsz(mtr
->umem
,
945 hr_dev
->caps
.page_size_cap
,
950 buf_attr
->page_shift
= order_base_2(page_sz
);
954 static int get_best_hop_num(struct hns_roce_dev
*hr_dev
,
955 struct hns_roce_mtr
*mtr
,
956 struct hns_roce_buf_attr
*buf_attr
,
957 unsigned int ba_pg_shift
)
959 #define INVALID_HOPNUM -1
961 size_t buf_pg_sz
= 1 << buf_attr
->page_shift
;
962 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
963 size_t ba_pg_sz
= 1 << ba_pg_shift
;
964 int hop_num
= INVALID_HOPNUM
;
965 size_t unit
= MIN_BA_CNT
;
969 if (!buf_attr
->adaptive
|| buf_attr
->type
!= MTR_PBL
)
972 /* Caculating the number of buf pages, each buf page need a BA */
974 ba_cnt
= ib_umem_num_dma_blocks(mtr
->umem
, buf_pg_sz
);
976 ba_cnt
= DIV_ROUND_UP(buf_attr
->region
[0].size
, buf_pg_sz
);
978 for (j
= 0; j
<= HNS_ROCE_MAX_HOP_NUM
; j
++) {
979 if (ba_cnt
<= unit
) {
983 /* Number of BAs can be represented at per hop */
984 unit
*= ba_pg_sz
/ BA_BYTE_LEN
;
989 "failed to calculate a valid hopnum.\n");
993 buf_attr
->region
[0].hopnum
= hop_num
;
998 static bool is_buf_attr_valid(struct hns_roce_dev
*hr_dev
,
999 struct hns_roce_buf_attr
*attr
)
1001 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
1003 if (attr
->region_count
> ARRAY_SIZE(attr
->region
) ||
1004 attr
->region_count
< 1 || attr
->page_shift
< HNS_HW_PAGE_SHIFT
) {
1006 "invalid buf attr, region count %d, page shift %u.\n",
1007 attr
->region_count
, attr
->page_shift
);
1014 static int mtr_init_buf_cfg(struct hns_roce_dev
*hr_dev
,
1015 struct hns_roce_mtr
*mtr
,
1016 struct hns_roce_buf_attr
*attr
)
1018 struct hns_roce_hem_cfg
*cfg
= &mtr
->hem_cfg
;
1019 struct hns_roce_buf_region
*r
;
1025 if (!is_buf_attr_valid(hr_dev
, attr
))
1028 /* If mtt is disabled, all pages must be within a continuous range */
1029 cfg
->is_direct
= !mtr_has_mtt(attr
);
1030 cfg
->region_count
= attr
->region_count
;
1031 buf_size
= mtr_bufs_size(attr
);
1032 if (need_split_huge_page(mtr
)) {
1033 buf_pg_sz
= HNS_HW_PAGE_SIZE
;
1034 cfg
->buf_pg_count
= 1;
1035 /* The ROCEE requires the page size to be 4K * 2 ^ N. */
1036 cfg
->buf_pg_shift
= HNS_HW_PAGE_SHIFT
+
1037 order_base_2(DIV_ROUND_UP(buf_size
, HNS_HW_PAGE_SIZE
));
1039 buf_pg_sz
= 1 << attr
->page_shift
;
1040 cfg
->buf_pg_count
= mtr
->umem
?
1041 ib_umem_num_dma_blocks(mtr
->umem
, buf_pg_sz
) :
1042 DIV_ROUND_UP(buf_size
, buf_pg_sz
);
1043 cfg
->buf_pg_shift
= attr
->page_shift
;
1044 pgoff
= mtr
->umem
? mtr
->umem
->address
& ~PAGE_MASK
: 0;
1047 /* Convert buffer size to page index and page count for each region and
1048 * the buffer's offset needs to be appended to the first region.
1050 for (page_cnt
= 0, i
= 0; i
< attr
->region_count
; i
++) {
1051 r
= &cfg
->region
[i
];
1052 r
->offset
= page_cnt
;
1053 buf_size
= hr_hw_page_align(attr
->region
[i
].size
+ pgoff
);
1054 if (attr
->type
== MTR_PBL
&& mtr
->umem
)
1055 r
->count
= ib_umem_num_dma_blocks(mtr
->umem
, buf_pg_sz
);
1057 r
->count
= DIV_ROUND_UP(buf_size
, buf_pg_sz
);
1060 page_cnt
+= r
->count
;
1061 r
->hopnum
= to_hr_hem_hopnum(attr
->region
[i
].hopnum
, r
->count
);
1067 static u64
cal_pages_per_l1ba(unsigned int ba_per_bt
, unsigned int hopnum
)
1069 return int_pow(ba_per_bt
, hopnum
- 1);
1072 static unsigned int cal_best_bt_pg_sz(struct hns_roce_dev
*hr_dev
,
1073 struct hns_roce_mtr
*mtr
,
1074 unsigned int pg_shift
)
1076 unsigned long cap
= hr_dev
->caps
.page_size_cap
;
1077 struct hns_roce_buf_region
*re
;
1078 unsigned int pgs_per_l1ba
;
1079 unsigned int ba_per_bt
;
1080 unsigned int ba_num
;
1083 for_each_set_bit_from(pg_shift
, &cap
, sizeof(cap
) * BITS_PER_BYTE
) {
1084 if (!(BIT(pg_shift
) & cap
))
1087 ba_per_bt
= BIT(pg_shift
) / BA_BYTE_LEN
;
1089 for (i
= 0; i
< mtr
->hem_cfg
.region_count
; i
++) {
1090 re
= &mtr
->hem_cfg
.region
[i
];
1091 if (re
->hopnum
== 0)
1094 pgs_per_l1ba
= cal_pages_per_l1ba(ba_per_bt
, re
->hopnum
);
1095 ba_num
+= DIV_ROUND_UP(re
->count
, pgs_per_l1ba
);
1098 if (ba_num
<= ba_per_bt
)
1105 static int mtr_alloc_mtt(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
1106 unsigned int ba_page_shift
)
1108 struct hns_roce_hem_cfg
*cfg
= &mtr
->hem_cfg
;
1111 hns_roce_hem_list_init(&mtr
->hem_list
);
1112 if (!cfg
->is_direct
) {
1113 ba_page_shift
= cal_best_bt_pg_sz(hr_dev
, mtr
, ba_page_shift
);
1117 ret
= hns_roce_hem_list_request(hr_dev
, &mtr
->hem_list
,
1118 cfg
->region
, cfg
->region_count
,
1122 cfg
->root_ba
= mtr
->hem_list
.root_ba
;
1123 cfg
->ba_pg_shift
= ba_page_shift
;
1125 cfg
->ba_pg_shift
= cfg
->buf_pg_shift
;
1131 static void mtr_free_mtt(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
)
1133 hns_roce_hem_list_release(hr_dev
, &mtr
->hem_list
);
1137 * hns_roce_mtr_create - Create hns memory translate region.
1139 * @hr_dev: RoCE device struct pointer
1140 * @mtr: memory translate region
1141 * @buf_attr: buffer attribute for creating mtr
1142 * @ba_page_shift: page shift for multi-hop base address table
1143 * @udata: user space context, if it's NULL, means kernel space
1144 * @user_addr: userspace virtual address to start at
1146 int hns_roce_mtr_create(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
,
1147 struct hns_roce_buf_attr
*buf_attr
,
1148 unsigned int ba_page_shift
, struct ib_udata
*udata
,
1149 unsigned long user_addr
)
1151 struct ib_device
*ibdev
= &hr_dev
->ib_dev
;
1154 /* The caller has its own buffer list and invokes the hns_roce_mtr_map()
1155 * to finish the MTT configuration.
1157 if (buf_attr
->mtt_only
) {
1161 ret
= mtr_alloc_bufs(hr_dev
, mtr
, buf_attr
, udata
, user_addr
);
1164 "failed to alloc mtr bufs, ret = %d.\n", ret
);
1168 ret
= get_best_page_shift(hr_dev
, mtr
, buf_attr
);
1172 ret
= get_best_hop_num(hr_dev
, mtr
, buf_attr
, ba_page_shift
);
1177 ret
= mtr_init_buf_cfg(hr_dev
, mtr
, buf_attr
);
1181 ret
= mtr_alloc_mtt(hr_dev
, mtr
, ba_page_shift
);
1183 ibdev_err(ibdev
, "failed to alloc mtr mtt, ret = %d.\n", ret
);
1187 if (buf_attr
->mtt_only
)
1190 /* Write buffer's dma address to MTT */
1191 ret
= mtr_map_bufs(hr_dev
, mtr
);
1193 ibdev_err(ibdev
, "failed to map mtr bufs, ret = %d.\n", ret
);
1200 mtr_free_mtt(hr_dev
, mtr
);
1202 mtr_free_bufs(hr_dev
, mtr
);
1207 void hns_roce_mtr_destroy(struct hns_roce_dev
*hr_dev
, struct hns_roce_mtr
*mtr
)
1209 /* release multi-hop addressing resource */
1210 hns_roce_hem_list_release(hr_dev
, &mtr
->hem_list
);
1213 mtr_free_bufs(hr_dev
, mtr
);