2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <linux/errno.h>
36 #include <linux/export.h>
37 #include <linux/slab.h>
39 #include <linux/mlx4/cmd.h>
45 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
47 struct mlx4_mpt_entry
{
61 __be32 first_byte_offset
;
64 #define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28)
65 #define MLX4_MPT_FLAG_FREE (0x3UL << 28)
66 #define MLX4_MPT_FLAG_MIO (1 << 17)
67 #define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15)
68 #define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
69 #define MLX4_MPT_FLAG_REGION (1 << 8)
71 #define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27)
72 #define MLX4_MPT_PD_FLAG_RAE (1 << 28)
73 #define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
75 #define MLX4_MPT_STATUS_SW 0xF0
76 #define MLX4_MPT_STATUS_HW 0x00
78 static u32
mlx4_buddy_alloc(struct mlx4_buddy
*buddy
, int order
)
84 spin_lock(&buddy
->lock
);
86 for (o
= order
; o
<= buddy
->max_order
; ++o
)
87 if (buddy
->num_free
[o
]) {
88 m
= 1 << (buddy
->max_order
- o
);
89 seg
= find_first_bit(buddy
->bits
[o
], m
);
94 spin_unlock(&buddy
->lock
);
98 clear_bit(seg
, buddy
->bits
[o
]);
104 set_bit(seg
^ 1, buddy
->bits
[o
]);
105 ++buddy
->num_free
[o
];
108 spin_unlock(&buddy
->lock
);
115 static void mlx4_buddy_free(struct mlx4_buddy
*buddy
, u32 seg
, int order
)
119 spin_lock(&buddy
->lock
);
121 while (test_bit(seg
^ 1, buddy
->bits
[order
])) {
122 clear_bit(seg
^ 1, buddy
->bits
[order
]);
123 --buddy
->num_free
[order
];
128 set_bit(seg
, buddy
->bits
[order
]);
129 ++buddy
->num_free
[order
];
131 spin_unlock(&buddy
->lock
);
134 static int mlx4_buddy_init(struct mlx4_buddy
*buddy
, int max_order
)
138 buddy
->max_order
= max_order
;
139 spin_lock_init(&buddy
->lock
);
141 buddy
->bits
= kzalloc((buddy
->max_order
+ 1) * sizeof (long *),
143 buddy
->num_free
= kzalloc((buddy
->max_order
+ 1) * sizeof (int *),
145 if (!buddy
->bits
|| !buddy
->num_free
)
148 for (i
= 0; i
<= buddy
->max_order
; ++i
) {
149 s
= BITS_TO_LONGS(1 << (buddy
->max_order
- i
));
150 buddy
->bits
[i
] = kmalloc(s
* sizeof (long), GFP_KERNEL
);
153 bitmap_zero(buddy
->bits
[i
], 1 << (buddy
->max_order
- i
));
156 set_bit(0, buddy
->bits
[buddy
->max_order
]);
157 buddy
->num_free
[buddy
->max_order
] = 1;
162 for (i
= 0; i
<= buddy
->max_order
; ++i
)
163 kfree(buddy
->bits
[i
]);
167 kfree(buddy
->num_free
);
172 static void mlx4_buddy_cleanup(struct mlx4_buddy
*buddy
)
176 for (i
= 0; i
<= buddy
->max_order
; ++i
)
177 kfree(buddy
->bits
[i
]);
180 kfree(buddy
->num_free
);
183 static u32
mlx4_alloc_mtt_range(struct mlx4_dev
*dev
, int order
)
185 struct mlx4_mr_table
*mr_table
= &mlx4_priv(dev
)->mr_table
;
188 seg
= mlx4_buddy_alloc(&mr_table
->mtt_buddy
, order
);
192 if (mlx4_table_get_range(dev
, &mr_table
->mtt_table
, seg
,
193 seg
+ (1 << order
) - 1)) {
194 mlx4_buddy_free(&mr_table
->mtt_buddy
, seg
, order
);
201 int mlx4_mtt_init(struct mlx4_dev
*dev
, int npages
, int page_shift
,
202 struct mlx4_mtt
*mtt
)
208 mtt
->page_shift
= MLX4_ICM_PAGE_SHIFT
;
211 mtt
->page_shift
= page_shift
;
213 for (mtt
->order
= 0, i
= dev
->caps
.mtts_per_seg
; i
< npages
; i
<<= 1)
216 mtt
->first_seg
= mlx4_alloc_mtt_range(dev
, mtt
->order
);
217 if (mtt
->first_seg
== -1)
222 EXPORT_SYMBOL_GPL(mlx4_mtt_init
);
224 void mlx4_mtt_cleanup(struct mlx4_dev
*dev
, struct mlx4_mtt
*mtt
)
226 struct mlx4_mr_table
*mr_table
= &mlx4_priv(dev
)->mr_table
;
231 mlx4_buddy_free(&mr_table
->mtt_buddy
, mtt
->first_seg
, mtt
->order
);
232 mlx4_table_put_range(dev
, &mr_table
->mtt_table
, mtt
->first_seg
,
233 mtt
->first_seg
+ (1 << mtt
->order
) - 1);
235 EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup
);
237 u64
mlx4_mtt_addr(struct mlx4_dev
*dev
, struct mlx4_mtt
*mtt
)
239 return (u64
) mtt
->first_seg
* dev
->caps
.mtt_entry_sz
;
241 EXPORT_SYMBOL_GPL(mlx4_mtt_addr
);
243 static u32
hw_index_to_key(u32 ind
)
245 return (ind
>> 24) | (ind
<< 8);
248 static u32
key_to_hw_index(u32 key
)
250 return (key
<< 24) | (key
>> 8);
253 static int mlx4_SW2HW_MPT(struct mlx4_dev
*dev
, struct mlx4_cmd_mailbox
*mailbox
,
256 return mlx4_cmd(dev
, mailbox
->dma
, mpt_index
, 0, MLX4_CMD_SW2HW_MPT
,
257 MLX4_CMD_TIME_CLASS_B
);
260 static int mlx4_HW2SW_MPT(struct mlx4_dev
*dev
, struct mlx4_cmd_mailbox
*mailbox
,
263 return mlx4_cmd_box(dev
, 0, mailbox
? mailbox
->dma
: 0, mpt_index
,
264 !mailbox
, MLX4_CMD_HW2SW_MPT
, MLX4_CMD_TIME_CLASS_B
);
267 int mlx4_mr_alloc(struct mlx4_dev
*dev
, u32 pd
, u64 iova
, u64 size
, u32 access
,
268 int npages
, int page_shift
, struct mlx4_mr
*mr
)
270 struct mlx4_priv
*priv
= mlx4_priv(dev
);
274 index
= mlx4_bitmap_alloc(&priv
->mr_table
.mpt_bitmap
);
283 mr
->key
= hw_index_to_key(index
);
285 err
= mlx4_mtt_init(dev
, npages
, page_shift
, &mr
->mtt
);
287 mlx4_bitmap_free(&priv
->mr_table
.mpt_bitmap
, index
);
291 EXPORT_SYMBOL_GPL(mlx4_mr_alloc
);
293 void mlx4_mr_free(struct mlx4_dev
*dev
, struct mlx4_mr
*mr
)
295 struct mlx4_priv
*priv
= mlx4_priv(dev
);
299 err
= mlx4_HW2SW_MPT(dev
, NULL
,
300 key_to_hw_index(mr
->key
) &
301 (dev
->caps
.num_mpts
- 1));
303 mlx4_warn(dev
, "HW2SW_MPT failed (%d)\n", err
);
306 mlx4_mtt_cleanup(dev
, &mr
->mtt
);
307 mlx4_bitmap_free(&priv
->mr_table
.mpt_bitmap
, key_to_hw_index(mr
->key
));
309 EXPORT_SYMBOL_GPL(mlx4_mr_free
);
311 int mlx4_mr_enable(struct mlx4_dev
*dev
, struct mlx4_mr
*mr
)
313 struct mlx4_mr_table
*mr_table
= &mlx4_priv(dev
)->mr_table
;
314 struct mlx4_cmd_mailbox
*mailbox
;
315 struct mlx4_mpt_entry
*mpt_entry
;
318 err
= mlx4_table_get(dev
, &mr_table
->dmpt_table
, key_to_hw_index(mr
->key
));
322 mailbox
= mlx4_alloc_cmd_mailbox(dev
);
323 if (IS_ERR(mailbox
)) {
324 err
= PTR_ERR(mailbox
);
327 mpt_entry
= mailbox
->buf
;
329 memset(mpt_entry
, 0, sizeof *mpt_entry
);
331 mpt_entry
->flags
= cpu_to_be32(MLX4_MPT_FLAG_MIO
|
332 MLX4_MPT_FLAG_REGION
|
335 mpt_entry
->key
= cpu_to_be32(key_to_hw_index(mr
->key
));
336 mpt_entry
->pd_flags
= cpu_to_be32(mr
->pd
| MLX4_MPT_PD_FLAG_EN_INV
);
337 mpt_entry
->start
= cpu_to_be64(mr
->iova
);
338 mpt_entry
->length
= cpu_to_be64(mr
->size
);
339 mpt_entry
->entity_size
= cpu_to_be32(mr
->mtt
.page_shift
);
341 if (mr
->mtt
.order
< 0) {
342 mpt_entry
->flags
|= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL
);
343 mpt_entry
->mtt_seg
= 0;
345 mpt_entry
->mtt_seg
= cpu_to_be64(mlx4_mtt_addr(dev
, &mr
->mtt
));
348 if (mr
->mtt
.order
>= 0 && mr
->mtt
.page_shift
== 0) {
349 /* fast register MR in free state */
350 mpt_entry
->flags
|= cpu_to_be32(MLX4_MPT_FLAG_FREE
);
351 mpt_entry
->pd_flags
|= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG
|
352 MLX4_MPT_PD_FLAG_RAE
);
353 mpt_entry
->mtt_sz
= cpu_to_be32((1 << mr
->mtt
.order
) *
354 dev
->caps
.mtts_per_seg
);
356 mpt_entry
->flags
|= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS
);
359 err
= mlx4_SW2HW_MPT(dev
, mailbox
,
360 key_to_hw_index(mr
->key
) & (dev
->caps
.num_mpts
- 1));
362 mlx4_warn(dev
, "SW2HW_MPT failed (%d)\n", err
);
368 mlx4_free_cmd_mailbox(dev
, mailbox
);
373 mlx4_free_cmd_mailbox(dev
, mailbox
);
376 mlx4_table_put(dev
, &mr_table
->dmpt_table
, key_to_hw_index(mr
->key
));
379 EXPORT_SYMBOL_GPL(mlx4_mr_enable
);
381 static int mlx4_write_mtt_chunk(struct mlx4_dev
*dev
, struct mlx4_mtt
*mtt
,
382 int start_index
, int npages
, u64
*page_list
)
384 struct mlx4_priv
*priv
= mlx4_priv(dev
);
386 dma_addr_t dma_handle
;
388 int s
= start_index
* sizeof (u64
);
390 /* All MTTs must fit in the same page */
391 if (start_index
/ (PAGE_SIZE
/ sizeof (u64
)) !=
392 (start_index
+ npages
- 1) / (PAGE_SIZE
/ sizeof (u64
)))
395 if (start_index
& (dev
->caps
.mtts_per_seg
- 1))
398 mtts
= mlx4_table_find(&priv
->mr_table
.mtt_table
, mtt
->first_seg
+
399 s
/ dev
->caps
.mtt_entry_sz
, &dma_handle
);
403 dma_sync_single_for_cpu(&dev
->pdev
->dev
, dma_handle
,
404 npages
* sizeof (u64
), DMA_TO_DEVICE
);
406 for (i
= 0; i
< npages
; ++i
)
407 mtts
[i
] = cpu_to_be64(page_list
[i
] | MLX4_MTT_FLAG_PRESENT
);
409 dma_sync_single_for_device(&dev
->pdev
->dev
, dma_handle
,
410 npages
* sizeof (u64
), DMA_TO_DEVICE
);
415 int mlx4_write_mtt(struct mlx4_dev
*dev
, struct mlx4_mtt
*mtt
,
416 int start_index
, int npages
, u64
*page_list
)
425 chunk
= min_t(int, PAGE_SIZE
/ sizeof(u64
), npages
);
426 err
= mlx4_write_mtt_chunk(dev
, mtt
, start_index
, chunk
, page_list
);
431 start_index
+= chunk
;
437 EXPORT_SYMBOL_GPL(mlx4_write_mtt
);
439 int mlx4_buf_write_mtt(struct mlx4_dev
*dev
, struct mlx4_mtt
*mtt
,
440 struct mlx4_buf
*buf
)
446 page_list
= kmalloc(buf
->npages
* sizeof *page_list
, GFP_KERNEL
);
450 for (i
= 0; i
< buf
->npages
; ++i
)
452 page_list
[i
] = buf
->direct
.map
+ (i
<< buf
->page_shift
);
454 page_list
[i
] = buf
->page_list
[i
].map
;
456 err
= mlx4_write_mtt(dev
, mtt
, 0, buf
->npages
, page_list
);
461 EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt
);
463 int mlx4_init_mr_table(struct mlx4_dev
*dev
)
465 struct mlx4_mr_table
*mr_table
= &mlx4_priv(dev
)->mr_table
;
468 err
= mlx4_bitmap_init(&mr_table
->mpt_bitmap
, dev
->caps
.num_mpts
,
469 ~0, dev
->caps
.reserved_mrws
, 0);
473 err
= mlx4_buddy_init(&mr_table
->mtt_buddy
,
474 ilog2(dev
->caps
.num_mtt_segs
));
478 if (dev
->caps
.reserved_mtts
) {
479 if (mlx4_alloc_mtt_range(dev
, fls(dev
->caps
.reserved_mtts
- 1)) == -1) {
480 mlx4_warn(dev
, "MTT table of order %d is too small.\n",
481 mr_table
->mtt_buddy
.max_order
);
483 goto err_reserve_mtts
;
490 mlx4_buddy_cleanup(&mr_table
->mtt_buddy
);
493 mlx4_bitmap_cleanup(&mr_table
->mpt_bitmap
);
498 void mlx4_cleanup_mr_table(struct mlx4_dev
*dev
)
500 struct mlx4_mr_table
*mr_table
= &mlx4_priv(dev
)->mr_table
;
502 mlx4_buddy_cleanup(&mr_table
->mtt_buddy
);
503 mlx4_bitmap_cleanup(&mr_table
->mpt_bitmap
);
506 static inline int mlx4_check_fmr(struct mlx4_fmr
*fmr
, u64
*page_list
,
507 int npages
, u64 iova
)
511 if (npages
> fmr
->max_pages
)
514 page_mask
= (1 << fmr
->page_shift
) - 1;
516 /* We are getting page lists, so va must be page aligned. */
517 if (iova
& page_mask
)
520 /* Trust the user not to pass misaligned data in page_list */
522 for (i
= 0; i
< npages
; ++i
) {
523 if (page_list
[i
] & ~page_mask
)
527 if (fmr
->maps
>= fmr
->max_maps
)
533 int mlx4_map_phys_fmr(struct mlx4_dev
*dev
, struct mlx4_fmr
*fmr
, u64
*page_list
,
534 int npages
, u64 iova
, u32
*lkey
, u32
*rkey
)
539 err
= mlx4_check_fmr(fmr
, page_list
, npages
, iova
);
545 key
= key_to_hw_index(fmr
->mr
.key
);
546 key
+= dev
->caps
.num_mpts
;
547 *lkey
= *rkey
= fmr
->mr
.key
= hw_index_to_key(key
);
549 *(u8
*) fmr
->mpt
= MLX4_MPT_STATUS_SW
;
551 /* Make sure MPT status is visible before writing MTT entries */
554 dma_sync_single_for_cpu(&dev
->pdev
->dev
, fmr
->dma_handle
,
555 npages
* sizeof(u64
), DMA_TO_DEVICE
);
557 for (i
= 0; i
< npages
; ++i
)
558 fmr
->mtts
[i
] = cpu_to_be64(page_list
[i
] | MLX4_MTT_FLAG_PRESENT
);
560 dma_sync_single_for_device(&dev
->pdev
->dev
, fmr
->dma_handle
,
561 npages
* sizeof(u64
), DMA_TO_DEVICE
);
563 fmr
->mpt
->key
= cpu_to_be32(key
);
564 fmr
->mpt
->lkey
= cpu_to_be32(key
);
565 fmr
->mpt
->length
= cpu_to_be64(npages
* (1ull << fmr
->page_shift
));
566 fmr
->mpt
->start
= cpu_to_be64(iova
);
568 /* Make MTT entries are visible before setting MPT status */
571 *(u8
*) fmr
->mpt
= MLX4_MPT_STATUS_HW
;
573 /* Make sure MPT status is visible before consumer can use FMR */
578 EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr
);
580 int mlx4_fmr_alloc(struct mlx4_dev
*dev
, u32 pd
, u32 access
, int max_pages
,
581 int max_maps
, u8 page_shift
, struct mlx4_fmr
*fmr
)
583 struct mlx4_priv
*priv
= mlx4_priv(dev
);
587 if (page_shift
< (ffs(dev
->caps
.page_size_cap
) - 1) || page_shift
>= 32)
590 /* All MTTs must fit in the same page */
591 if (max_pages
* sizeof *fmr
->mtts
> PAGE_SIZE
)
594 fmr
->page_shift
= page_shift
;
595 fmr
->max_pages
= max_pages
;
596 fmr
->max_maps
= max_maps
;
599 err
= mlx4_mr_alloc(dev
, pd
, 0, 0, access
, max_pages
,
600 page_shift
, &fmr
->mr
);
604 mtt_seg
= fmr
->mr
.mtt
.first_seg
* dev
->caps
.mtt_entry_sz
;
606 fmr
->mtts
= mlx4_table_find(&priv
->mr_table
.mtt_table
,
607 fmr
->mr
.mtt
.first_seg
,
617 mlx4_mr_free(dev
, &fmr
->mr
);
620 EXPORT_SYMBOL_GPL(mlx4_fmr_alloc
);
622 int mlx4_fmr_enable(struct mlx4_dev
*dev
, struct mlx4_fmr
*fmr
)
624 struct mlx4_priv
*priv
= mlx4_priv(dev
);
627 err
= mlx4_mr_enable(dev
, &fmr
->mr
);
631 fmr
->mpt
= mlx4_table_find(&priv
->mr_table
.dmpt_table
,
632 key_to_hw_index(fmr
->mr
.key
), NULL
);
638 EXPORT_SYMBOL_GPL(mlx4_fmr_enable
);
640 void mlx4_fmr_unmap(struct mlx4_dev
*dev
, struct mlx4_fmr
*fmr
,
641 u32
*lkey
, u32
*rkey
)
648 *(u8
*) fmr
->mpt
= MLX4_MPT_STATUS_SW
;
650 EXPORT_SYMBOL_GPL(mlx4_fmr_unmap
);
652 int mlx4_fmr_free(struct mlx4_dev
*dev
, struct mlx4_fmr
*fmr
)
658 mlx4_mr_free(dev
, &fmr
->mr
);
662 EXPORT_SYMBOL_GPL(mlx4_fmr_free
);
664 int mlx4_SYNC_TPT(struct mlx4_dev
*dev
)
666 return mlx4_cmd(dev
, 0, 0, 0, MLX4_CMD_SYNC_TPT
, 1000);
668 EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT
);