2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/module.h>
34 #include <rdma/ib_umem.h>
35 #include <rdma/ib_cache.h>
36 #include <rdma/ib_user_verbs.h>
37 #include <rdma/rdma_counter.h>
38 #include <linux/mlx5/fs.h>
47 MLX5_IB_ACK_REQ_FREQ
= 8,
51 MLX5_IB_DEFAULT_SCHED_QUEUE
= 0x83,
52 MLX5_IB_DEFAULT_QP0_SCHED_QUEUE
= 0x3f,
53 MLX5_IB_LINK_TYPE_IB
= 0,
54 MLX5_IB_LINK_TYPE_ETH
= 1
57 enum raw_qp_set_mask_map
{
58 MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID
= 1UL << 0,
59 MLX5_RAW_QP_RATE_LIMIT
= 1UL << 1,
62 struct mlx5_modify_raw_qp_param
{
65 u32 set_mask
; /* raw_qp_set_mask_map */
67 struct mlx5_rate_limit rl
;
73 static void get_cqs(enum ib_qp_type qp_type
,
74 struct ib_cq
*ib_send_cq
, struct ib_cq
*ib_recv_cq
,
75 struct mlx5_ib_cq
**send_cq
, struct mlx5_ib_cq
**recv_cq
);
77 static int is_qp0(enum ib_qp_type qp_type
)
79 return qp_type
== IB_QPT_SMI
;
82 static int is_sqp(enum ib_qp_type qp_type
)
84 return is_qp0(qp_type
) || is_qp1(qp_type
);
88 * mlx5_ib_read_user_wqe_common() - Copy a WQE (or part of) from user WQ
91 * @umem: User space memory where the WQ is
92 * @buffer: buffer to copy to
93 * @buflen: buffer length
94 * @wqe_index: index of WQE to copy from
95 * @wq_offset: offset to start of WQ
96 * @wq_wqe_cnt: number of WQEs in WQ
97 * @wq_wqe_shift: log2 of WQE size
98 * @bcnt: number of bytes to copy
99 * @bytes_copied: number of bytes to copy (return value)
101 * Copies from start of WQE bcnt or less bytes.
102 * Does not gurantee to copy the entire WQE.
104 * Return: zero on success, or an error code.
106 static int mlx5_ib_read_user_wqe_common(struct ib_umem
*umem
, void *buffer
,
107 size_t buflen
, int wqe_index
,
108 int wq_offset
, int wq_wqe_cnt
,
109 int wq_wqe_shift
, int bcnt
,
110 size_t *bytes_copied
)
112 size_t offset
= wq_offset
+ ((wqe_index
% wq_wqe_cnt
) << wq_wqe_shift
);
113 size_t wq_end
= wq_offset
+ (wq_wqe_cnt
<< wq_wqe_shift
);
117 /* don't copy more than requested, more than buffer length or
120 copy_length
= min_t(u32
, buflen
, wq_end
- offset
);
121 copy_length
= min_t(u32
, copy_length
, bcnt
);
123 ret
= ib_umem_copy_from(buffer
, umem
, offset
, copy_length
);
127 if (!ret
&& bytes_copied
)
128 *bytes_copied
= copy_length
;
133 static int mlx5_ib_read_kernel_wqe_sq(struct mlx5_ib_qp
*qp
, int wqe_index
,
134 void *buffer
, size_t buflen
, size_t *bc
)
136 struct mlx5_wqe_ctrl_seg
*ctrl
;
137 size_t bytes_copied
= 0;
142 wqe_index
= wqe_index
& qp
->sq
.fbc
.sz_m1
;
144 /* read the control segment first */
145 p
= mlx5_frag_buf_get_wqe(&qp
->sq
.fbc
, wqe_index
);
147 ds
= be32_to_cpu(ctrl
->qpn_ds
) & MLX5_WQE_CTRL_DS_MASK
;
148 wqe_length
= ds
* MLX5_WQE_DS_UNITS
;
150 /* read rest of WQE if it spreads over more than one stride */
151 while (bytes_copied
< wqe_length
) {
153 min_t(size_t, buflen
- bytes_copied
, MLX5_SEND_WQE_BB
);
158 memcpy(buffer
+ bytes_copied
, p
, copy_length
);
159 bytes_copied
+= copy_length
;
161 wqe_index
= (wqe_index
+ 1) & qp
->sq
.fbc
.sz_m1
;
162 p
= mlx5_frag_buf_get_wqe(&qp
->sq
.fbc
, wqe_index
);
168 static int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp
*qp
, int wqe_index
,
169 void *buffer
, size_t buflen
, size_t *bc
)
171 struct mlx5_ib_qp_base
*base
= &qp
->trans_qp
.base
;
172 struct ib_umem
*umem
= base
->ubuffer
.umem
;
173 struct mlx5_ib_wq
*wq
= &qp
->sq
;
174 struct mlx5_wqe_ctrl_seg
*ctrl
;
176 size_t bytes_copied2
;
181 /* at first read as much as possible */
182 ret
= mlx5_ib_read_user_wqe_common(umem
, buffer
, buflen
, wqe_index
,
183 wq
->offset
, wq
->wqe_cnt
,
184 wq
->wqe_shift
, buflen
,
189 /* we need at least control segment size to proceed */
190 if (bytes_copied
< sizeof(*ctrl
))
194 ds
= be32_to_cpu(ctrl
->qpn_ds
) & MLX5_WQE_CTRL_DS_MASK
;
195 wqe_length
= ds
* MLX5_WQE_DS_UNITS
;
197 /* if we copied enough then we are done */
198 if (bytes_copied
>= wqe_length
) {
203 /* otherwise this a wrapped around wqe
204 * so read the remaining bytes starting
207 ret
= mlx5_ib_read_user_wqe_common(umem
, buffer
+ bytes_copied
,
208 buflen
- bytes_copied
, 0, wq
->offset
,
209 wq
->wqe_cnt
, wq
->wqe_shift
,
210 wqe_length
- bytes_copied
,
215 *bc
= bytes_copied
+ bytes_copied2
;
219 int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp
*qp
, int wqe_index
, void *buffer
,
220 size_t buflen
, size_t *bc
)
222 struct mlx5_ib_qp_base
*base
= &qp
->trans_qp
.base
;
223 struct ib_umem
*umem
= base
->ubuffer
.umem
;
225 if (buflen
< sizeof(struct mlx5_wqe_ctrl_seg
))
229 return mlx5_ib_read_kernel_wqe_sq(qp
, wqe_index
, buffer
,
232 return mlx5_ib_read_user_wqe_sq(qp
, wqe_index
, buffer
, buflen
, bc
);
235 static int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp
*qp
, int wqe_index
,
236 void *buffer
, size_t buflen
, size_t *bc
)
238 struct mlx5_ib_qp_base
*base
= &qp
->trans_qp
.base
;
239 struct ib_umem
*umem
= base
->ubuffer
.umem
;
240 struct mlx5_ib_wq
*wq
= &qp
->rq
;
244 ret
= mlx5_ib_read_user_wqe_common(umem
, buffer
, buflen
, wqe_index
,
245 wq
->offset
, wq
->wqe_cnt
,
246 wq
->wqe_shift
, buflen
,
255 int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp
*qp
, int wqe_index
, void *buffer
,
256 size_t buflen
, size_t *bc
)
258 struct mlx5_ib_qp_base
*base
= &qp
->trans_qp
.base
;
259 struct ib_umem
*umem
= base
->ubuffer
.umem
;
260 struct mlx5_ib_wq
*wq
= &qp
->rq
;
261 size_t wqe_size
= 1 << wq
->wqe_shift
;
263 if (buflen
< wqe_size
)
269 return mlx5_ib_read_user_wqe_rq(qp
, wqe_index
, buffer
, buflen
, bc
);
272 static int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq
*srq
, int wqe_index
,
273 void *buffer
, size_t buflen
, size_t *bc
)
275 struct ib_umem
*umem
= srq
->umem
;
279 ret
= mlx5_ib_read_user_wqe_common(umem
, buffer
, buflen
, wqe_index
, 0,
280 srq
->msrq
.max
, srq
->msrq
.wqe_shift
,
281 buflen
, &bytes_copied
);
289 int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq
*srq
, int wqe_index
, void *buffer
,
290 size_t buflen
, size_t *bc
)
292 struct ib_umem
*umem
= srq
->umem
;
293 size_t wqe_size
= 1 << srq
->msrq
.wqe_shift
;
295 if (buflen
< wqe_size
)
301 return mlx5_ib_read_user_wqe_srq(srq
, wqe_index
, buffer
, buflen
, bc
);
304 static void mlx5_ib_qp_event(struct mlx5_core_qp
*qp
, int type
)
306 struct ib_qp
*ibqp
= &to_mibqp(qp
)->ibqp
;
307 struct ib_event event
;
309 if (type
== MLX5_EVENT_TYPE_PATH_MIG
) {
310 /* This event is only valid for trans_qps */
311 to_mibqp(qp
)->port
= to_mibqp(qp
)->trans_qp
.alt_port
;
314 if (ibqp
->event_handler
) {
315 event
.device
= ibqp
->device
;
316 event
.element
.qp
= ibqp
;
318 case MLX5_EVENT_TYPE_PATH_MIG
:
319 event
.event
= IB_EVENT_PATH_MIG
;
321 case MLX5_EVENT_TYPE_COMM_EST
:
322 event
.event
= IB_EVENT_COMM_EST
;
324 case MLX5_EVENT_TYPE_SQ_DRAINED
:
325 event
.event
= IB_EVENT_SQ_DRAINED
;
327 case MLX5_EVENT_TYPE_SRQ_LAST_WQE
:
328 event
.event
= IB_EVENT_QP_LAST_WQE_REACHED
;
330 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR
:
331 event
.event
= IB_EVENT_QP_FATAL
;
333 case MLX5_EVENT_TYPE_PATH_MIG_FAILED
:
334 event
.event
= IB_EVENT_PATH_MIG_ERR
;
336 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR
:
337 event
.event
= IB_EVENT_QP_REQ_ERR
;
339 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR
:
340 event
.event
= IB_EVENT_QP_ACCESS_ERR
;
343 pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type
, qp
->qpn
);
347 ibqp
->event_handler(&event
, ibqp
->qp_context
);
351 static int set_rq_size(struct mlx5_ib_dev
*dev
, struct ib_qp_cap
*cap
,
352 int has_rq
, struct mlx5_ib_qp
*qp
, struct mlx5_ib_create_qp
*ucmd
)
357 /* Sanity check RQ size before proceeding */
358 if (cap
->max_recv_wr
> (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_qp_sz
)))
364 qp
->rq
.wqe_shift
= 0;
365 cap
->max_recv_wr
= 0;
366 cap
->max_recv_sge
= 0;
368 int wq_sig
= !!(qp
->flags_en
& MLX5_QP_FLAG_SIGNATURE
);
371 qp
->rq
.wqe_cnt
= ucmd
->rq_wqe_count
;
372 if (ucmd
->rq_wqe_shift
> BITS_PER_BYTE
* sizeof(ucmd
->rq_wqe_shift
))
374 qp
->rq
.wqe_shift
= ucmd
->rq_wqe_shift
;
375 if ((1 << qp
->rq
.wqe_shift
) /
376 sizeof(struct mlx5_wqe_data_seg
) <
380 (1 << qp
->rq
.wqe_shift
) /
381 sizeof(struct mlx5_wqe_data_seg
) -
383 qp
->rq
.max_post
= qp
->rq
.wqe_cnt
;
386 wq_sig
? sizeof(struct mlx5_wqe_signature_seg
) :
388 wqe_size
+= cap
->max_recv_sge
* sizeof(struct mlx5_wqe_data_seg
);
389 wqe_size
= roundup_pow_of_two(wqe_size
);
390 wq_size
= roundup_pow_of_two(cap
->max_recv_wr
) * wqe_size
;
391 wq_size
= max_t(int, wq_size
, MLX5_SEND_WQE_BB
);
392 qp
->rq
.wqe_cnt
= wq_size
/ wqe_size
;
393 if (wqe_size
> MLX5_CAP_GEN(dev
->mdev
, max_wqe_sz_rq
)) {
394 mlx5_ib_dbg(dev
, "wqe_size %d, max %d\n",
396 MLX5_CAP_GEN(dev
->mdev
,
400 qp
->rq
.wqe_shift
= ilog2(wqe_size
);
402 (1 << qp
->rq
.wqe_shift
) /
403 sizeof(struct mlx5_wqe_data_seg
) -
405 qp
->rq
.max_post
= qp
->rq
.wqe_cnt
;
412 static int sq_overhead(struct ib_qp_init_attr
*attr
)
416 switch (attr
->qp_type
) {
418 size
+= sizeof(struct mlx5_wqe_xrc_seg
);
421 size
+= sizeof(struct mlx5_wqe_ctrl_seg
) +
422 max(sizeof(struct mlx5_wqe_atomic_seg
) +
423 sizeof(struct mlx5_wqe_raddr_seg
),
424 sizeof(struct mlx5_wqe_umr_ctrl_seg
) +
425 sizeof(struct mlx5_mkey_seg
) +
426 MLX5_IB_SQ_UMR_INLINE_THRESHOLD
/
427 MLX5_IB_UMR_OCTOWORD
);
434 size
+= sizeof(struct mlx5_wqe_ctrl_seg
) +
435 max(sizeof(struct mlx5_wqe_raddr_seg
),
436 sizeof(struct mlx5_wqe_umr_ctrl_seg
) +
437 sizeof(struct mlx5_mkey_seg
));
441 if (attr
->create_flags
& IB_QP_CREATE_IPOIB_UD_LSO
)
442 size
+= sizeof(struct mlx5_wqe_eth_pad
) +
443 sizeof(struct mlx5_wqe_eth_seg
);
446 case MLX5_IB_QPT_HW_GSI
:
447 size
+= sizeof(struct mlx5_wqe_ctrl_seg
) +
448 sizeof(struct mlx5_wqe_datagram_seg
);
451 case MLX5_IB_QPT_REG_UMR
:
452 size
+= sizeof(struct mlx5_wqe_ctrl_seg
) +
453 sizeof(struct mlx5_wqe_umr_ctrl_seg
) +
454 sizeof(struct mlx5_mkey_seg
);
464 static int calc_send_wqe(struct ib_qp_init_attr
*attr
)
469 size
= sq_overhead(attr
);
473 if (attr
->cap
.max_inline_data
) {
474 inl_size
= size
+ sizeof(struct mlx5_wqe_inline_seg
) +
475 attr
->cap
.max_inline_data
;
478 size
+= attr
->cap
.max_send_sge
* sizeof(struct mlx5_wqe_data_seg
);
479 if (attr
->create_flags
& IB_QP_CREATE_INTEGRITY_EN
&&
480 ALIGN(max_t(int, inl_size
, size
), MLX5_SEND_WQE_BB
) < MLX5_SIG_WQE_SIZE
)
481 return MLX5_SIG_WQE_SIZE
;
483 return ALIGN(max_t(int, inl_size
, size
), MLX5_SEND_WQE_BB
);
486 static int get_send_sge(struct ib_qp_init_attr
*attr
, int wqe_size
)
490 if (attr
->qp_type
== IB_QPT_RC
)
491 max_sge
= (min_t(int, wqe_size
, 512) -
492 sizeof(struct mlx5_wqe_ctrl_seg
) -
493 sizeof(struct mlx5_wqe_raddr_seg
)) /
494 sizeof(struct mlx5_wqe_data_seg
);
495 else if (attr
->qp_type
== IB_QPT_XRC_INI
)
496 max_sge
= (min_t(int, wqe_size
, 512) -
497 sizeof(struct mlx5_wqe_ctrl_seg
) -
498 sizeof(struct mlx5_wqe_xrc_seg
) -
499 sizeof(struct mlx5_wqe_raddr_seg
)) /
500 sizeof(struct mlx5_wqe_data_seg
);
502 max_sge
= (wqe_size
- sq_overhead(attr
)) /
503 sizeof(struct mlx5_wqe_data_seg
);
505 return min_t(int, max_sge
, wqe_size
- sq_overhead(attr
) /
506 sizeof(struct mlx5_wqe_data_seg
));
509 static int calc_sq_size(struct mlx5_ib_dev
*dev
, struct ib_qp_init_attr
*attr
,
510 struct mlx5_ib_qp
*qp
)
515 if (!attr
->cap
.max_send_wr
)
518 wqe_size
= calc_send_wqe(attr
);
519 mlx5_ib_dbg(dev
, "wqe_size %d\n", wqe_size
);
523 if (wqe_size
> MLX5_CAP_GEN(dev
->mdev
, max_wqe_sz_sq
)) {
524 mlx5_ib_dbg(dev
, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
525 wqe_size
, MLX5_CAP_GEN(dev
->mdev
, max_wqe_sz_sq
));
529 qp
->max_inline_data
= wqe_size
- sq_overhead(attr
) -
530 sizeof(struct mlx5_wqe_inline_seg
);
531 attr
->cap
.max_inline_data
= qp
->max_inline_data
;
533 wq_size
= roundup_pow_of_two(attr
->cap
.max_send_wr
* wqe_size
);
534 qp
->sq
.wqe_cnt
= wq_size
/ MLX5_SEND_WQE_BB
;
535 if (qp
->sq
.wqe_cnt
> (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_qp_sz
))) {
536 mlx5_ib_dbg(dev
, "send queue size (%d * %d / %d -> %d) exceeds limits(%d)\n",
537 attr
->cap
.max_send_wr
, wqe_size
, MLX5_SEND_WQE_BB
,
539 1 << MLX5_CAP_GEN(dev
->mdev
, log_max_qp_sz
));
542 qp
->sq
.wqe_shift
= ilog2(MLX5_SEND_WQE_BB
);
543 qp
->sq
.max_gs
= get_send_sge(attr
, wqe_size
);
544 if (qp
->sq
.max_gs
< attr
->cap
.max_send_sge
)
547 attr
->cap
.max_send_sge
= qp
->sq
.max_gs
;
548 qp
->sq
.max_post
= wq_size
/ wqe_size
;
549 attr
->cap
.max_send_wr
= qp
->sq
.max_post
;
554 static int set_user_buf_size(struct mlx5_ib_dev
*dev
,
555 struct mlx5_ib_qp
*qp
,
556 struct mlx5_ib_create_qp
*ucmd
,
557 struct mlx5_ib_qp_base
*base
,
558 struct ib_qp_init_attr
*attr
)
560 int desc_sz
= 1 << qp
->sq
.wqe_shift
;
562 if (desc_sz
> MLX5_CAP_GEN(dev
->mdev
, max_wqe_sz_sq
)) {
563 mlx5_ib_warn(dev
, "desc_sz %d, max_sq_desc_sz %d\n",
564 desc_sz
, MLX5_CAP_GEN(dev
->mdev
, max_wqe_sz_sq
));
568 if (ucmd
->sq_wqe_count
&& !is_power_of_2(ucmd
->sq_wqe_count
)) {
569 mlx5_ib_warn(dev
, "sq_wqe_count %d is not a power of two\n",
574 qp
->sq
.wqe_cnt
= ucmd
->sq_wqe_count
;
576 if (qp
->sq
.wqe_cnt
> (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_qp_sz
))) {
577 mlx5_ib_warn(dev
, "wqe_cnt %d, max_wqes %d\n",
579 1 << MLX5_CAP_GEN(dev
->mdev
, log_max_qp_sz
));
583 if (attr
->qp_type
== IB_QPT_RAW_PACKET
||
584 qp
->flags
& IB_QP_CREATE_SOURCE_QPN
) {
585 base
->ubuffer
.buf_size
= qp
->rq
.wqe_cnt
<< qp
->rq
.wqe_shift
;
586 qp
->raw_packet_qp
.sq
.ubuffer
.buf_size
= qp
->sq
.wqe_cnt
<< 6;
588 base
->ubuffer
.buf_size
= (qp
->rq
.wqe_cnt
<< qp
->rq
.wqe_shift
) +
589 (qp
->sq
.wqe_cnt
<< 6);
595 static int qp_has_rq(struct ib_qp_init_attr
*attr
)
597 if (attr
->qp_type
== IB_QPT_XRC_INI
||
598 attr
->qp_type
== IB_QPT_XRC_TGT
|| attr
->srq
||
599 attr
->qp_type
== MLX5_IB_QPT_REG_UMR
||
600 !attr
->cap
.max_recv_wr
)
607 /* this is the first blue flame register in the array of bfregs assigned
608 * to a processes. Since we do not use it for blue flame but rather
609 * regular 64 bit doorbells, we do not need a lock for maintaiing
612 NUM_NON_BLUE_FLAME_BFREGS
= 1,
615 static int max_bfregs(struct mlx5_ib_dev
*dev
, struct mlx5_bfreg_info
*bfregi
)
617 return get_num_static_uars(dev
, bfregi
) * MLX5_NON_FP_BFREGS_PER_UAR
;
620 static int num_med_bfreg(struct mlx5_ib_dev
*dev
,
621 struct mlx5_bfreg_info
*bfregi
)
625 n
= max_bfregs(dev
, bfregi
) - bfregi
->num_low_latency_bfregs
-
626 NUM_NON_BLUE_FLAME_BFREGS
;
628 return n
>= 0 ? n
: 0;
631 static int first_med_bfreg(struct mlx5_ib_dev
*dev
,
632 struct mlx5_bfreg_info
*bfregi
)
634 return num_med_bfreg(dev
, bfregi
) ? 1 : -ENOMEM
;
637 static int first_hi_bfreg(struct mlx5_ib_dev
*dev
,
638 struct mlx5_bfreg_info
*bfregi
)
642 med
= num_med_bfreg(dev
, bfregi
);
646 static int alloc_high_class_bfreg(struct mlx5_ib_dev
*dev
,
647 struct mlx5_bfreg_info
*bfregi
)
651 for (i
= first_hi_bfreg(dev
, bfregi
); i
< max_bfregs(dev
, bfregi
); i
++) {
652 if (!bfregi
->count
[i
]) {
661 static int alloc_med_class_bfreg(struct mlx5_ib_dev
*dev
,
662 struct mlx5_bfreg_info
*bfregi
)
664 int minidx
= first_med_bfreg(dev
, bfregi
);
670 for (i
= minidx
; i
< first_hi_bfreg(dev
, bfregi
); i
++) {
671 if (bfregi
->count
[i
] < bfregi
->count
[minidx
])
673 if (!bfregi
->count
[minidx
])
677 bfregi
->count
[minidx
]++;
681 static int alloc_bfreg(struct mlx5_ib_dev
*dev
,
682 struct mlx5_bfreg_info
*bfregi
)
684 int bfregn
= -ENOMEM
;
686 if (bfregi
->lib_uar_dyn
)
689 mutex_lock(&bfregi
->lock
);
690 if (bfregi
->ver
>= 2) {
691 bfregn
= alloc_high_class_bfreg(dev
, bfregi
);
693 bfregn
= alloc_med_class_bfreg(dev
, bfregi
);
697 BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS
!= 1);
699 bfregi
->count
[bfregn
]++;
701 mutex_unlock(&bfregi
->lock
);
706 void mlx5_ib_free_bfreg(struct mlx5_ib_dev
*dev
, struct mlx5_bfreg_info
*bfregi
, int bfregn
)
708 mutex_lock(&bfregi
->lock
);
709 bfregi
->count
[bfregn
]--;
710 mutex_unlock(&bfregi
->lock
);
713 static enum mlx5_qp_state
to_mlx5_state(enum ib_qp_state state
)
716 case IB_QPS_RESET
: return MLX5_QP_STATE_RST
;
717 case IB_QPS_INIT
: return MLX5_QP_STATE_INIT
;
718 case IB_QPS_RTR
: return MLX5_QP_STATE_RTR
;
719 case IB_QPS_RTS
: return MLX5_QP_STATE_RTS
;
720 case IB_QPS_SQD
: return MLX5_QP_STATE_SQD
;
721 case IB_QPS_SQE
: return MLX5_QP_STATE_SQER
;
722 case IB_QPS_ERR
: return MLX5_QP_STATE_ERR
;
727 static int to_mlx5_st(enum ib_qp_type type
)
730 case IB_QPT_RC
: return MLX5_QP_ST_RC
;
731 case IB_QPT_UC
: return MLX5_QP_ST_UC
;
732 case IB_QPT_UD
: return MLX5_QP_ST_UD
;
733 case MLX5_IB_QPT_REG_UMR
: return MLX5_QP_ST_REG_UMR
;
735 case IB_QPT_XRC_TGT
: return MLX5_QP_ST_XRC
;
736 case IB_QPT_SMI
: return MLX5_QP_ST_QP0
;
737 case MLX5_IB_QPT_HW_GSI
: return MLX5_QP_ST_QP1
;
738 case MLX5_IB_QPT_DCI
: return MLX5_QP_ST_DCI
;
739 case IB_QPT_RAW_PACKET
: return MLX5_QP_ST_RAW_ETHERTYPE
;
740 default: return -EINVAL
;
744 static void mlx5_ib_lock_cqs(struct mlx5_ib_cq
*send_cq
,
745 struct mlx5_ib_cq
*recv_cq
);
746 static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq
*send_cq
,
747 struct mlx5_ib_cq
*recv_cq
);
749 int bfregn_to_uar_index(struct mlx5_ib_dev
*dev
,
750 struct mlx5_bfreg_info
*bfregi
, u32 bfregn
,
753 unsigned int bfregs_per_sys_page
;
754 u32 index_of_sys_page
;
757 if (bfregi
->lib_uar_dyn
)
760 bfregs_per_sys_page
= get_uars_per_sys_page(dev
, bfregi
->lib_uar_4k
) *
761 MLX5_NON_FP_BFREGS_PER_UAR
;
762 index_of_sys_page
= bfregn
/ bfregs_per_sys_page
;
765 index_of_sys_page
+= bfregi
->num_static_sys_pages
;
767 if (index_of_sys_page
>= bfregi
->num_sys_pages
)
770 if (bfregn
> bfregi
->num_dyn_bfregs
||
771 bfregi
->sys_pages
[index_of_sys_page
] == MLX5_IB_INVALID_UAR_INDEX
) {
772 mlx5_ib_dbg(dev
, "Invalid dynamic uar index\n");
777 offset
= bfregn
% bfregs_per_sys_page
/ MLX5_NON_FP_BFREGS_PER_UAR
;
778 return bfregi
->sys_pages
[index_of_sys_page
] + offset
;
781 static void destroy_user_rq(struct mlx5_ib_dev
*dev
, struct ib_pd
*pd
,
782 struct mlx5_ib_rwq
*rwq
, struct ib_udata
*udata
)
784 struct mlx5_ib_ucontext
*context
=
785 rdma_udata_to_drv_context(
787 struct mlx5_ib_ucontext
,
790 if (rwq
->create_flags
& MLX5_IB_WQ_FLAGS_DELAY_DROP
)
791 atomic_dec(&dev
->delay_drop
.rqs_cnt
);
793 mlx5_ib_db_unmap_user(context
, &rwq
->db
);
794 ib_umem_release(rwq
->umem
);
797 static int create_user_rq(struct mlx5_ib_dev
*dev
, struct ib_pd
*pd
,
798 struct ib_udata
*udata
, struct mlx5_ib_rwq
*rwq
,
799 struct mlx5_ib_create_wq
*ucmd
)
801 struct mlx5_ib_ucontext
*ucontext
= rdma_udata_to_drv_context(
802 udata
, struct mlx5_ib_ucontext
, ibucontext
);
803 unsigned long page_size
= 0;
810 rwq
->umem
= ib_umem_get(&dev
->ib_dev
, ucmd
->buf_addr
, rwq
->buf_size
, 0);
811 if (IS_ERR(rwq
->umem
)) {
812 mlx5_ib_dbg(dev
, "umem_get failed\n");
813 err
= PTR_ERR(rwq
->umem
);
817 page_size
= mlx5_umem_find_best_quantized_pgoff(
818 rwq
->umem
, wq
, log_wq_pg_sz
, MLX5_ADAPTER_PAGE_SHIFT
,
819 page_offset
, 64, &rwq
->rq_page_offset
);
821 mlx5_ib_warn(dev
, "bad offset\n");
826 rwq
->rq_num_pas
= ib_umem_num_dma_blocks(rwq
->umem
, page_size
);
827 rwq
->page_shift
= order_base_2(page_size
);
828 rwq
->log_page_size
= rwq
->page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
829 rwq
->wq_sig
= !!(ucmd
->flags
& MLX5_WQ_FLAG_SIGNATURE
);
833 "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n",
834 (unsigned long long)ucmd
->buf_addr
, rwq
->buf_size
,
835 ib_umem_num_pages(rwq
->umem
), page_size
, rwq
->rq_num_pas
,
838 err
= mlx5_ib_db_map_user(ucontext
, udata
, ucmd
->db_addr
, &rwq
->db
);
840 mlx5_ib_dbg(dev
, "map failed\n");
847 ib_umem_release(rwq
->umem
);
851 static int adjust_bfregn(struct mlx5_ib_dev
*dev
,
852 struct mlx5_bfreg_info
*bfregi
, int bfregn
)
854 return bfregn
/ MLX5_NON_FP_BFREGS_PER_UAR
* MLX5_BFREGS_PER_UAR
+
855 bfregn
% MLX5_NON_FP_BFREGS_PER_UAR
;
858 static int _create_user_qp(struct mlx5_ib_dev
*dev
, struct ib_pd
*pd
,
859 struct mlx5_ib_qp
*qp
, struct ib_udata
*udata
,
860 struct ib_qp_init_attr
*attr
, u32
**in
,
861 struct mlx5_ib_create_qp_resp
*resp
, int *inlen
,
862 struct mlx5_ib_qp_base
*base
,
863 struct mlx5_ib_create_qp
*ucmd
)
865 struct mlx5_ib_ucontext
*context
;
866 struct mlx5_ib_ubuffer
*ubuffer
= &base
->ubuffer
;
867 unsigned int page_offset_quantized
= 0;
868 unsigned long page_size
= 0;
878 context
= rdma_udata_to_drv_context(udata
, struct mlx5_ib_ucontext
,
880 uar_flags
= qp
->flags_en
&
881 (MLX5_QP_FLAG_UAR_PAGE_INDEX
| MLX5_QP_FLAG_BFREG_INDEX
);
883 case MLX5_QP_FLAG_UAR_PAGE_INDEX
:
884 uar_index
= ucmd
->bfreg_index
;
885 bfregn
= MLX5_IB_INVALID_BFREG
;
887 case MLX5_QP_FLAG_BFREG_INDEX
:
888 uar_index
= bfregn_to_uar_index(dev
, &context
->bfregi
,
889 ucmd
->bfreg_index
, true);
892 bfregn
= MLX5_IB_INVALID_BFREG
;
895 if (qp
->flags
& IB_QP_CREATE_CROSS_CHANNEL
)
897 bfregn
= alloc_bfreg(dev
, &context
->bfregi
);
905 mlx5_ib_dbg(dev
, "bfregn 0x%x, uar_index 0x%x\n", bfregn
, uar_index
);
906 if (bfregn
!= MLX5_IB_INVALID_BFREG
)
907 uar_index
= bfregn_to_uar_index(dev
, &context
->bfregi
, bfregn
,
911 qp
->sq
.wqe_shift
= ilog2(MLX5_SEND_WQE_BB
);
912 qp
->sq
.offset
= qp
->rq
.wqe_cnt
<< qp
->rq
.wqe_shift
;
914 err
= set_user_buf_size(dev
, qp
, ucmd
, base
, attr
);
918 if (ucmd
->buf_addr
&& ubuffer
->buf_size
) {
919 ubuffer
->buf_addr
= ucmd
->buf_addr
;
920 ubuffer
->umem
= ib_umem_get(&dev
->ib_dev
, ubuffer
->buf_addr
,
921 ubuffer
->buf_size
, 0);
922 if (IS_ERR(ubuffer
->umem
)) {
923 err
= PTR_ERR(ubuffer
->umem
);
926 page_size
= mlx5_umem_find_best_quantized_pgoff(
927 ubuffer
->umem
, qpc
, log_page_size
,
928 MLX5_ADAPTER_PAGE_SHIFT
, page_offset
, 64,
929 &page_offset_quantized
);
934 ncont
= ib_umem_num_dma_blocks(ubuffer
->umem
, page_size
);
936 ubuffer
->umem
= NULL
;
939 *inlen
= MLX5_ST_SZ_BYTES(create_qp_in
) +
940 MLX5_FLD_SZ_BYTES(create_qp_in
, pas
[0]) * ncont
;
941 *in
= kvzalloc(*inlen
, GFP_KERNEL
);
947 uid
= (attr
->qp_type
!= IB_QPT_XRC_INI
) ? to_mpd(pd
)->uid
: 0;
948 MLX5_SET(create_qp_in
, *in
, uid
, uid
);
949 qpc
= MLX5_ADDR_OF(create_qp_in
, *in
, qpc
);
950 pas
= (__be64
*)MLX5_ADDR_OF(create_qp_in
, *in
, pas
);
952 mlx5_ib_populate_pas(ubuffer
->umem
, page_size
, pas
, 0);
953 MLX5_SET(qpc
, qpc
, log_page_size
,
954 order_base_2(page_size
) - MLX5_ADAPTER_PAGE_SHIFT
);
955 MLX5_SET(qpc
, qpc
, page_offset
, page_offset_quantized
);
957 MLX5_SET(qpc
, qpc
, uar_page
, uar_index
);
958 if (bfregn
!= MLX5_IB_INVALID_BFREG
)
959 resp
->bfreg_index
= adjust_bfregn(dev
, &context
->bfregi
, bfregn
);
961 resp
->bfreg_index
= MLX5_IB_INVALID_BFREG
;
964 err
= mlx5_ib_db_map_user(context
, udata
, ucmd
->db_addr
, &qp
->db
);
966 mlx5_ib_dbg(dev
, "map failed\n");
976 ib_umem_release(ubuffer
->umem
);
979 if (bfregn
!= MLX5_IB_INVALID_BFREG
)
980 mlx5_ib_free_bfreg(dev
, &context
->bfregi
, bfregn
);
984 static void destroy_qp(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
985 struct mlx5_ib_qp_base
*base
, struct ib_udata
*udata
)
987 struct mlx5_ib_ucontext
*context
= rdma_udata_to_drv_context(
988 udata
, struct mlx5_ib_ucontext
, ibucontext
);
992 mlx5_ib_db_unmap_user(context
, &qp
->db
);
993 ib_umem_release(base
->ubuffer
.umem
);
996 * Free only the BFREGs which are handled by the kernel.
997 * BFREGs of UARs allocated dynamically are handled by user.
999 if (qp
->bfregn
!= MLX5_IB_INVALID_BFREG
)
1000 mlx5_ib_free_bfreg(dev
, &context
->bfregi
, qp
->bfregn
);
1005 kvfree(qp
->sq
.wqe_head
);
1006 kvfree(qp
->sq
.w_list
);
1007 kvfree(qp
->sq
.wrid
);
1008 kvfree(qp
->sq
.wr_data
);
1009 kvfree(qp
->rq
.wrid
);
1011 mlx5_db_free(dev
->mdev
, &qp
->db
);
1013 mlx5_frag_buf_free(dev
->mdev
, &qp
->buf
);
1016 static int _create_kernel_qp(struct mlx5_ib_dev
*dev
,
1017 struct ib_qp_init_attr
*init_attr
,
1018 struct mlx5_ib_qp
*qp
, u32
**in
, int *inlen
,
1019 struct mlx5_ib_qp_base
*base
)
1025 if (init_attr
->qp_type
== MLX5_IB_QPT_REG_UMR
)
1026 qp
->bf
.bfreg
= &dev
->fp_bfreg
;
1027 else if (qp
->flags
& MLX5_IB_QP_CREATE_WC_TEST
)
1028 qp
->bf
.bfreg
= &dev
->wc_bfreg
;
1030 qp
->bf
.bfreg
= &dev
->bfreg
;
1032 /* We need to divide by two since each register is comprised of
1033 * two buffers of identical size, namely odd and even
1035 qp
->bf
.buf_size
= (1 << MLX5_CAP_GEN(dev
->mdev
, log_bf_reg_size
)) / 2;
1036 uar_index
= qp
->bf
.bfreg
->index
;
1038 err
= calc_sq_size(dev
, init_attr
, qp
);
1040 mlx5_ib_dbg(dev
, "err %d\n", err
);
1045 qp
->sq
.offset
= qp
->rq
.wqe_cnt
<< qp
->rq
.wqe_shift
;
1046 base
->ubuffer
.buf_size
= err
+ (qp
->rq
.wqe_cnt
<< qp
->rq
.wqe_shift
);
1048 err
= mlx5_frag_buf_alloc_node(dev
->mdev
, base
->ubuffer
.buf_size
,
1049 &qp
->buf
, dev
->mdev
->priv
.numa_node
);
1051 mlx5_ib_dbg(dev
, "err %d\n", err
);
1056 mlx5_init_fbc(qp
->buf
.frags
, qp
->rq
.wqe_shift
,
1057 ilog2(qp
->rq
.wqe_cnt
), &qp
->rq
.fbc
);
1059 if (qp
->sq
.wqe_cnt
) {
1060 int sq_strides_offset
= (qp
->sq
.offset
& (PAGE_SIZE
- 1)) /
1062 mlx5_init_fbc_offset(qp
->buf
.frags
+
1063 (qp
->sq
.offset
/ PAGE_SIZE
),
1064 ilog2(MLX5_SEND_WQE_BB
),
1065 ilog2(qp
->sq
.wqe_cnt
),
1066 sq_strides_offset
, &qp
->sq
.fbc
);
1068 qp
->sq
.cur_edge
= get_sq_edge(&qp
->sq
, 0);
1071 *inlen
= MLX5_ST_SZ_BYTES(create_qp_in
) +
1072 MLX5_FLD_SZ_BYTES(create_qp_in
, pas
[0]) * qp
->buf
.npages
;
1073 *in
= kvzalloc(*inlen
, GFP_KERNEL
);
1079 qpc
= MLX5_ADDR_OF(create_qp_in
, *in
, qpc
);
1080 MLX5_SET(qpc
, qpc
, uar_page
, uar_index
);
1081 MLX5_SET(qpc
, qpc
, log_page_size
, qp
->buf
.page_shift
- MLX5_ADAPTER_PAGE_SHIFT
);
1083 /* Set "fast registration enabled" for all kernel QPs */
1084 MLX5_SET(qpc
, qpc
, fre
, 1);
1085 MLX5_SET(qpc
, qpc
, rlky
, 1);
1087 if (qp
->flags
& MLX5_IB_QP_CREATE_SQPN_QP1
)
1088 MLX5_SET(qpc
, qpc
, deth_sqpn
, 1);
1090 mlx5_fill_page_frag_array(&qp
->buf
,
1091 (__be64
*)MLX5_ADDR_OF(create_qp_in
,
1094 err
= mlx5_db_alloc(dev
->mdev
, &qp
->db
);
1096 mlx5_ib_dbg(dev
, "err %d\n", err
);
1100 qp
->sq
.wrid
= kvmalloc_array(qp
->sq
.wqe_cnt
,
1101 sizeof(*qp
->sq
.wrid
), GFP_KERNEL
);
1102 qp
->sq
.wr_data
= kvmalloc_array(qp
->sq
.wqe_cnt
,
1103 sizeof(*qp
->sq
.wr_data
), GFP_KERNEL
);
1104 qp
->rq
.wrid
= kvmalloc_array(qp
->rq
.wqe_cnt
,
1105 sizeof(*qp
->rq
.wrid
), GFP_KERNEL
);
1106 qp
->sq
.w_list
= kvmalloc_array(qp
->sq
.wqe_cnt
,
1107 sizeof(*qp
->sq
.w_list
), GFP_KERNEL
);
1108 qp
->sq
.wqe_head
= kvmalloc_array(qp
->sq
.wqe_cnt
,
1109 sizeof(*qp
->sq
.wqe_head
), GFP_KERNEL
);
1111 if (!qp
->sq
.wrid
|| !qp
->sq
.wr_data
|| !qp
->rq
.wrid
||
1112 !qp
->sq
.w_list
|| !qp
->sq
.wqe_head
) {
1120 kvfree(qp
->sq
.wqe_head
);
1121 kvfree(qp
->sq
.w_list
);
1122 kvfree(qp
->sq
.wrid
);
1123 kvfree(qp
->sq
.wr_data
);
1124 kvfree(qp
->rq
.wrid
);
1125 mlx5_db_free(dev
->mdev
, &qp
->db
);
1131 mlx5_frag_buf_free(dev
->mdev
, &qp
->buf
);
1135 static u32
get_rx_type(struct mlx5_ib_qp
*qp
, struct ib_qp_init_attr
*attr
)
1137 if (attr
->srq
|| (qp
->type
== IB_QPT_XRC_TGT
) ||
1138 (qp
->type
== MLX5_IB_QPT_DCI
) || (qp
->type
== IB_QPT_XRC_INI
))
1140 else if (!qp
->has_rq
)
1141 return MLX5_ZERO_LEN_RQ
;
1143 return MLX5_NON_ZERO_RQ
;
1146 static int create_raw_packet_qp_tis(struct mlx5_ib_dev
*dev
,
1147 struct mlx5_ib_qp
*qp
,
1148 struct mlx5_ib_sq
*sq
, u32 tdn
,
1151 u32 in
[MLX5_ST_SZ_DW(create_tis_in
)] = {};
1152 void *tisc
= MLX5_ADDR_OF(create_tis_in
, in
, ctx
);
1154 MLX5_SET(create_tis_in
, in
, uid
, to_mpd(pd
)->uid
);
1155 MLX5_SET(tisc
, tisc
, transport_domain
, tdn
);
1156 if (qp
->flags
& IB_QP_CREATE_SOURCE_QPN
)
1157 MLX5_SET(tisc
, tisc
, underlay_qpn
, qp
->underlay_qpn
);
1159 return mlx5_core_create_tis(dev
->mdev
, in
, &sq
->tisn
);
1162 static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev
*dev
,
1163 struct mlx5_ib_sq
*sq
, struct ib_pd
*pd
)
1165 mlx5_cmd_destroy_tis(dev
->mdev
, sq
->tisn
, to_mpd(pd
)->uid
);
1168 static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq
*sq
)
1171 mlx5_del_flow_rules(sq
->flow_rule
);
1172 sq
->flow_rule
= NULL
;
1175 static int create_raw_packet_qp_sq(struct mlx5_ib_dev
*dev
,
1176 struct ib_udata
*udata
,
1177 struct mlx5_ib_sq
*sq
, void *qpin
,
1180 struct mlx5_ib_ubuffer
*ubuffer
= &sq
->ubuffer
;
1184 void *qpc
= MLX5_ADDR_OF(create_qp_in
, qpin
, qpc
);
1188 unsigned int page_offset_quantized
;
1189 unsigned long page_size
;
1191 sq
->ubuffer
.umem
= ib_umem_get(&dev
->ib_dev
, ubuffer
->buf_addr
,
1192 ubuffer
->buf_size
, 0);
1193 if (IS_ERR(sq
->ubuffer
.umem
))
1194 return PTR_ERR(sq
->ubuffer
.umem
);
1195 page_size
= mlx5_umem_find_best_quantized_pgoff(
1196 ubuffer
->umem
, wq
, log_wq_pg_sz
, MLX5_ADAPTER_PAGE_SHIFT
,
1197 page_offset
, 64, &page_offset_quantized
);
1203 inlen
= MLX5_ST_SZ_BYTES(create_sq_in
) +
1205 ib_umem_num_dma_blocks(sq
->ubuffer
.umem
, page_size
);
1206 in
= kvzalloc(inlen
, GFP_KERNEL
);
1212 MLX5_SET(create_sq_in
, in
, uid
, to_mpd(pd
)->uid
);
1213 sqc
= MLX5_ADDR_OF(create_sq_in
, in
, ctx
);
1214 MLX5_SET(sqc
, sqc
, flush_in_error_en
, 1);
1215 if (MLX5_CAP_ETH(dev
->mdev
, multi_pkt_send_wqe
))
1216 MLX5_SET(sqc
, sqc
, allow_multi_pkt_send_wqe
, 1);
1217 MLX5_SET(sqc
, sqc
, state
, MLX5_SQC_STATE_RST
);
1218 MLX5_SET(sqc
, sqc
, user_index
, MLX5_GET(qpc
, qpc
, user_index
));
1219 MLX5_SET(sqc
, sqc
, cqn
, MLX5_GET(qpc
, qpc
, cqn_snd
));
1220 MLX5_SET(sqc
, sqc
, tis_lst_sz
, 1);
1221 MLX5_SET(sqc
, sqc
, tis_num_0
, sq
->tisn
);
1222 if (MLX5_CAP_GEN(dev
->mdev
, eth_net_offloads
) &&
1223 MLX5_CAP_ETH(dev
->mdev
, swp
))
1224 MLX5_SET(sqc
, sqc
, allow_swp
, 1);
1226 wq
= MLX5_ADDR_OF(sqc
, sqc
, wq
);
1227 MLX5_SET(wq
, wq
, wq_type
, MLX5_WQ_TYPE_CYCLIC
);
1228 MLX5_SET(wq
, wq
, pd
, MLX5_GET(qpc
, qpc
, pd
));
1229 MLX5_SET(wq
, wq
, uar_page
, MLX5_GET(qpc
, qpc
, uar_page
));
1230 MLX5_SET64(wq
, wq
, dbr_addr
, MLX5_GET64(qpc
, qpc
, dbr_addr
));
1231 MLX5_SET(wq
, wq
, log_wq_stride
, ilog2(MLX5_SEND_WQE_BB
));
1232 MLX5_SET(wq
, wq
, log_wq_sz
, MLX5_GET(qpc
, qpc
, log_sq_size
));
1233 MLX5_SET(wq
, wq
, log_wq_pg_sz
,
1234 order_base_2(page_size
) - MLX5_ADAPTER_PAGE_SHIFT
);
1235 MLX5_SET(wq
, wq
, page_offset
, page_offset_quantized
);
1237 pas
= (__be64
*)MLX5_ADDR_OF(wq
, wq
, pas
);
1238 mlx5_ib_populate_pas(sq
->ubuffer
.umem
, page_size
, pas
, 0);
1240 err
= mlx5_core_create_sq_tracked(dev
, in
, inlen
, &sq
->base
.mqp
);
1250 ib_umem_release(sq
->ubuffer
.umem
);
1251 sq
->ubuffer
.umem
= NULL
;
1256 static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev
*dev
,
1257 struct mlx5_ib_sq
*sq
)
1259 destroy_flow_rule_vport_sq(sq
);
1260 mlx5_core_destroy_sq_tracked(dev
, &sq
->base
.mqp
);
1261 ib_umem_release(sq
->ubuffer
.umem
);
1264 static int create_raw_packet_qp_rq(struct mlx5_ib_dev
*dev
,
1265 struct mlx5_ib_rq
*rq
, void *qpin
,
1268 struct mlx5_ib_qp
*mqp
= rq
->base
.container_mibqp
;
1273 void *qpc
= MLX5_ADDR_OF(create_qp_in
, qpin
, qpc
);
1274 struct ib_umem
*umem
= rq
->base
.ubuffer
.umem
;
1275 unsigned int page_offset_quantized
;
1276 unsigned long page_size
= 0;
1280 page_size
= mlx5_umem_find_best_quantized_pgoff(umem
, wq
, log_wq_pg_sz
,
1281 MLX5_ADAPTER_PAGE_SHIFT
,
1283 &page_offset_quantized
);
1287 inlen
= MLX5_ST_SZ_BYTES(create_rq_in
) +
1288 sizeof(u64
) * ib_umem_num_dma_blocks(umem
, page_size
);
1289 in
= kvzalloc(inlen
, GFP_KERNEL
);
1293 MLX5_SET(create_rq_in
, in
, uid
, to_mpd(pd
)->uid
);
1294 rqc
= MLX5_ADDR_OF(create_rq_in
, in
, ctx
);
1295 if (!(rq
->flags
& MLX5_IB_RQ_CVLAN_STRIPPING
))
1296 MLX5_SET(rqc
, rqc
, vsd
, 1);
1297 MLX5_SET(rqc
, rqc
, mem_rq_type
, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE
);
1298 MLX5_SET(rqc
, rqc
, state
, MLX5_RQC_STATE_RST
);
1299 MLX5_SET(rqc
, rqc
, flush_in_error_en
, 1);
1300 MLX5_SET(rqc
, rqc
, user_index
, MLX5_GET(qpc
, qpc
, user_index
));
1301 MLX5_SET(rqc
, rqc
, cqn
, MLX5_GET(qpc
, qpc
, cqn_rcv
));
1303 if (mqp
->flags
& IB_QP_CREATE_SCATTER_FCS
)
1304 MLX5_SET(rqc
, rqc
, scatter_fcs
, 1);
1306 wq
= MLX5_ADDR_OF(rqc
, rqc
, wq
);
1307 MLX5_SET(wq
, wq
, wq_type
, MLX5_WQ_TYPE_CYCLIC
);
1308 if (rq
->flags
& MLX5_IB_RQ_PCI_WRITE_END_PADDING
)
1309 MLX5_SET(wq
, wq
, end_padding_mode
, MLX5_WQ_END_PAD_MODE_ALIGN
);
1310 MLX5_SET(wq
, wq
, page_offset
, page_offset_quantized
);
1311 MLX5_SET(wq
, wq
, pd
, MLX5_GET(qpc
, qpc
, pd
));
1312 MLX5_SET64(wq
, wq
, dbr_addr
, MLX5_GET64(qpc
, qpc
, dbr_addr
));
1313 MLX5_SET(wq
, wq
, log_wq_stride
, MLX5_GET(qpc
, qpc
, log_rq_stride
) + 4);
1314 MLX5_SET(wq
, wq
, log_wq_pg_sz
,
1315 order_base_2(page_size
) - MLX5_ADAPTER_PAGE_SHIFT
);
1316 MLX5_SET(wq
, wq
, log_wq_sz
, MLX5_GET(qpc
, qpc
, log_rq_size
));
1318 pas
= (__be64
*)MLX5_ADDR_OF(wq
, wq
, pas
);
1319 mlx5_ib_populate_pas(umem
, page_size
, pas
, 0);
1321 err
= mlx5_core_create_rq_tracked(dev
, in
, inlen
, &rq
->base
.mqp
);
1328 static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev
*dev
,
1329 struct mlx5_ib_rq
*rq
)
1331 mlx5_core_destroy_rq_tracked(dev
, &rq
->base
.mqp
);
1334 static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev
*dev
,
1335 struct mlx5_ib_rq
*rq
,
1339 if (qp_flags_en
& (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC
|
1340 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC
))
1341 mlx5_ib_disable_lb(dev
, false, true);
1342 mlx5_cmd_destroy_tir(dev
->mdev
, rq
->tirn
, to_mpd(pd
)->uid
);
1345 static int create_raw_packet_qp_tir(struct mlx5_ib_dev
*dev
,
1346 struct mlx5_ib_rq
*rq
, u32 tdn
,
1347 u32
*qp_flags_en
, struct ib_pd
*pd
,
1356 inlen
= MLX5_ST_SZ_BYTES(create_tir_in
);
1357 in
= kvzalloc(inlen
, GFP_KERNEL
);
1361 MLX5_SET(create_tir_in
, in
, uid
, to_mpd(pd
)->uid
);
1362 tirc
= MLX5_ADDR_OF(create_tir_in
, in
, ctx
);
1363 MLX5_SET(tirc
, tirc
, disp_type
, MLX5_TIRC_DISP_TYPE_DIRECT
);
1364 MLX5_SET(tirc
, tirc
, inline_rqn
, rq
->base
.mqp
.qpn
);
1365 MLX5_SET(tirc
, tirc
, transport_domain
, tdn
);
1366 if (*qp_flags_en
& MLX5_QP_FLAG_TUNNEL_OFFLOADS
)
1367 MLX5_SET(tirc
, tirc
, tunneled_offload_en
, 1);
1369 if (*qp_flags_en
& MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC
)
1370 lb_flag
|= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST
;
1372 if (*qp_flags_en
& MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC
)
1373 lb_flag
|= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST
;
1376 lb_flag
|= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST
;
1377 *qp_flags_en
|= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC
;
1380 MLX5_SET(tirc
, tirc
, self_lb_block
, lb_flag
);
1381 MLX5_SET(create_tir_in
, in
, opcode
, MLX5_CMD_OP_CREATE_TIR
);
1382 err
= mlx5_cmd_exec_inout(dev
->mdev
, create_tir
, in
, out
);
1383 rq
->tirn
= MLX5_GET(create_tir_out
, out
, tirn
);
1384 if (!err
&& MLX5_GET(tirc
, tirc
, self_lb_block
)) {
1385 err
= mlx5_ib_enable_lb(dev
, false, true);
1388 destroy_raw_packet_qp_tir(dev
, rq
, 0, pd
);
1395 static int create_raw_packet_qp(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
1396 u32
*in
, size_t inlen
,
1398 struct ib_udata
*udata
,
1399 struct mlx5_ib_create_qp_resp
*resp
)
1401 struct mlx5_ib_raw_packet_qp
*raw_packet_qp
= &qp
->raw_packet_qp
;
1402 struct mlx5_ib_sq
*sq
= &raw_packet_qp
->sq
;
1403 struct mlx5_ib_rq
*rq
= &raw_packet_qp
->rq
;
1404 struct mlx5_ib_ucontext
*mucontext
= rdma_udata_to_drv_context(
1405 udata
, struct mlx5_ib_ucontext
, ibucontext
);
1407 u32 tdn
= mucontext
->tdn
;
1408 u16 uid
= to_mpd(pd
)->uid
;
1409 u32 out
[MLX5_ST_SZ_DW(create_tir_out
)] = {};
1411 if (!qp
->sq
.wqe_cnt
&& !qp
->rq
.wqe_cnt
)
1413 if (qp
->sq
.wqe_cnt
) {
1414 err
= create_raw_packet_qp_tis(dev
, qp
, sq
, tdn
, pd
);
1418 err
= create_raw_packet_qp_sq(dev
, udata
, sq
, in
, pd
);
1420 goto err_destroy_tis
;
1423 resp
->tisn
= sq
->tisn
;
1424 resp
->comp_mask
|= MLX5_IB_CREATE_QP_RESP_MASK_TISN
;
1425 resp
->sqn
= sq
->base
.mqp
.qpn
;
1426 resp
->comp_mask
|= MLX5_IB_CREATE_QP_RESP_MASK_SQN
;
1429 sq
->base
.container_mibqp
= qp
;
1430 sq
->base
.mqp
.event
= mlx5_ib_qp_event
;
1433 if (qp
->rq
.wqe_cnt
) {
1434 rq
->base
.container_mibqp
= qp
;
1436 if (qp
->flags
& IB_QP_CREATE_CVLAN_STRIPPING
)
1437 rq
->flags
|= MLX5_IB_RQ_CVLAN_STRIPPING
;
1438 if (qp
->flags
& IB_QP_CREATE_PCI_WRITE_END_PADDING
)
1439 rq
->flags
|= MLX5_IB_RQ_PCI_WRITE_END_PADDING
;
1440 err
= create_raw_packet_qp_rq(dev
, rq
, in
, pd
);
1442 goto err_destroy_sq
;
1444 err
= create_raw_packet_qp_tir(dev
, rq
, tdn
, &qp
->flags_en
, pd
,
1447 goto err_destroy_rq
;
1450 resp
->rqn
= rq
->base
.mqp
.qpn
;
1451 resp
->comp_mask
|= MLX5_IB_CREATE_QP_RESP_MASK_RQN
;
1452 resp
->tirn
= rq
->tirn
;
1453 resp
->comp_mask
|= MLX5_IB_CREATE_QP_RESP_MASK_TIRN
;
1454 if (MLX5_CAP_FLOWTABLE_NIC_RX(dev
->mdev
, sw_owner
) ||
1455 MLX5_CAP_FLOWTABLE_NIC_RX(dev
->mdev
, sw_owner_v2
)) {
1456 resp
->tir_icm_addr
= MLX5_GET(
1457 create_tir_out
, out
, icm_address_31_0
);
1458 resp
->tir_icm_addr
|=
1459 (u64
)MLX5_GET(create_tir_out
, out
,
1462 resp
->tir_icm_addr
|=
1463 (u64
)MLX5_GET(create_tir_out
, out
,
1467 MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR
;
1472 qp
->trans_qp
.base
.mqp
.qpn
= qp
->sq
.wqe_cnt
? sq
->base
.mqp
.qpn
:
1477 destroy_raw_packet_qp_rq(dev
, rq
);
1479 if (!qp
->sq
.wqe_cnt
)
1481 destroy_raw_packet_qp_sq(dev
, sq
);
1483 destroy_raw_packet_qp_tis(dev
, sq
, pd
);
1488 static void destroy_raw_packet_qp(struct mlx5_ib_dev
*dev
,
1489 struct mlx5_ib_qp
*qp
)
1491 struct mlx5_ib_raw_packet_qp
*raw_packet_qp
= &qp
->raw_packet_qp
;
1492 struct mlx5_ib_sq
*sq
= &raw_packet_qp
->sq
;
1493 struct mlx5_ib_rq
*rq
= &raw_packet_qp
->rq
;
1495 if (qp
->rq
.wqe_cnt
) {
1496 destroy_raw_packet_qp_tir(dev
, rq
, qp
->flags_en
, qp
->ibqp
.pd
);
1497 destroy_raw_packet_qp_rq(dev
, rq
);
1500 if (qp
->sq
.wqe_cnt
) {
1501 destroy_raw_packet_qp_sq(dev
, sq
);
1502 destroy_raw_packet_qp_tis(dev
, sq
, qp
->ibqp
.pd
);
1506 static void raw_packet_qp_copy_info(struct mlx5_ib_qp
*qp
,
1507 struct mlx5_ib_raw_packet_qp
*raw_packet_qp
)
1509 struct mlx5_ib_sq
*sq
= &raw_packet_qp
->sq
;
1510 struct mlx5_ib_rq
*rq
= &raw_packet_qp
->rq
;
1514 sq
->doorbell
= &qp
->db
;
1515 rq
->doorbell
= &qp
->db
;
1518 static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
)
1520 if (qp
->flags_en
& (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC
|
1521 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC
))
1522 mlx5_ib_disable_lb(dev
, false, true);
1523 mlx5_cmd_destroy_tir(dev
->mdev
, qp
->rss_qp
.tirn
,
1524 to_mpd(qp
->ibqp
.pd
)->uid
);
1527 struct mlx5_create_qp_params
{
1528 struct ib_udata
*udata
;
1534 struct ib_qp_init_attr
*attr
;
1536 struct mlx5_ib_create_qp_resp resp
;
1539 static int create_rss_raw_qp_tir(struct mlx5_ib_dev
*dev
, struct ib_pd
*pd
,
1540 struct mlx5_ib_qp
*qp
,
1541 struct mlx5_create_qp_params
*params
)
1543 struct ib_qp_init_attr
*init_attr
= params
->attr
;
1544 struct mlx5_ib_create_qp_rss
*ucmd
= params
->ucmd
;
1545 struct ib_udata
*udata
= params
->udata
;
1546 struct mlx5_ib_ucontext
*mucontext
= rdma_udata_to_drv_context(
1547 udata
, struct mlx5_ib_ucontext
, ibucontext
);
1555 u32 selected_fields
= 0;
1557 u32 tdn
= mucontext
->tdn
;
1560 if (ucmd
->comp_mask
) {
1561 mlx5_ib_dbg(dev
, "invalid comp mask\n");
1565 if (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_INNER
&&
1566 !(ucmd
->flags
& MLX5_QP_FLAG_TUNNEL_OFFLOADS
)) {
1567 mlx5_ib_dbg(dev
, "Tunnel offloads must be set for inner RSS\n");
1572 qp
->flags_en
|= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC
;
1574 if (qp
->flags_en
& MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC
)
1575 lb_flag
|= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST
;
1577 if (qp
->flags_en
& MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC
)
1578 lb_flag
|= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST
;
1580 inlen
= MLX5_ST_SZ_BYTES(create_tir_in
);
1581 outlen
= MLX5_ST_SZ_BYTES(create_tir_out
);
1582 in
= kvzalloc(inlen
+ outlen
, GFP_KERNEL
);
1586 out
= in
+ MLX5_ST_SZ_DW(create_tir_in
);
1587 MLX5_SET(create_tir_in
, in
, uid
, to_mpd(pd
)->uid
);
1588 tirc
= MLX5_ADDR_OF(create_tir_in
, in
, ctx
);
1589 MLX5_SET(tirc
, tirc
, disp_type
,
1590 MLX5_TIRC_DISP_TYPE_INDIRECT
);
1591 MLX5_SET(tirc
, tirc
, indirect_table
,
1592 init_attr
->rwq_ind_tbl
->ind_tbl_num
);
1593 MLX5_SET(tirc
, tirc
, transport_domain
, tdn
);
1595 hfso
= MLX5_ADDR_OF(tirc
, tirc
, rx_hash_field_selector_outer
);
1597 if (ucmd
->flags
& MLX5_QP_FLAG_TUNNEL_OFFLOADS
)
1598 MLX5_SET(tirc
, tirc
, tunneled_offload_en
, 1);
1600 MLX5_SET(tirc
, tirc
, self_lb_block
, lb_flag
);
1602 if (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_INNER
)
1603 hfso
= MLX5_ADDR_OF(tirc
, tirc
, rx_hash_field_selector_inner
);
1605 hfso
= MLX5_ADDR_OF(tirc
, tirc
, rx_hash_field_selector_outer
);
1607 switch (ucmd
->rx_hash_function
) {
1608 case MLX5_RX_HASH_FUNC_TOEPLITZ
:
1610 void *rss_key
= MLX5_ADDR_OF(tirc
, tirc
, rx_hash_toeplitz_key
);
1611 size_t len
= MLX5_FLD_SZ_BYTES(tirc
, rx_hash_toeplitz_key
);
1613 if (len
!= ucmd
->rx_key_len
) {
1618 MLX5_SET(tirc
, tirc
, rx_hash_fn
, MLX5_RX_HASH_FN_TOEPLITZ
);
1619 memcpy(rss_key
, ucmd
->rx_hash_key
, len
);
1627 if (!ucmd
->rx_hash_fields_mask
) {
1628 /* special case when this TIR serves as steering entry without hashing */
1629 if (!init_attr
->rwq_ind_tbl
->log_ind_tbl_size
)
1635 if (((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_IPV4
) ||
1636 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_IPV4
)) &&
1637 ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_IPV6
) ||
1638 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_IPV6
))) {
1643 /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
1644 if ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_IPV4
) ||
1645 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_IPV4
))
1646 MLX5_SET(rx_hash_field_select
, hfso
, l3_prot_type
,
1647 MLX5_L3_PROT_TYPE_IPV4
);
1648 else if ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_IPV6
) ||
1649 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_IPV6
))
1650 MLX5_SET(rx_hash_field_select
, hfso
, l3_prot_type
,
1651 MLX5_L3_PROT_TYPE_IPV6
);
1653 outer_l4
= ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_PORT_TCP
) ||
1654 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_PORT_TCP
))
1656 ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_PORT_UDP
) ||
1657 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_PORT_UDP
))
1659 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_IPSEC_SPI
) << 2;
1661 /* Check that only one l4 protocol is set */
1662 if (outer_l4
& (outer_l4
- 1)) {
1667 /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
1668 if ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_PORT_TCP
) ||
1669 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_PORT_TCP
))
1670 MLX5_SET(rx_hash_field_select
, hfso
, l4_prot_type
,
1671 MLX5_L4_PROT_TYPE_TCP
);
1672 else if ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_PORT_UDP
) ||
1673 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_PORT_UDP
))
1674 MLX5_SET(rx_hash_field_select
, hfso
, l4_prot_type
,
1675 MLX5_L4_PROT_TYPE_UDP
);
1677 if ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_IPV4
) ||
1678 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_IPV6
))
1679 selected_fields
|= MLX5_HASH_FIELD_SEL_SRC_IP
;
1681 if ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_IPV4
) ||
1682 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_IPV6
))
1683 selected_fields
|= MLX5_HASH_FIELD_SEL_DST_IP
;
1685 if ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_PORT_TCP
) ||
1686 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_SRC_PORT_UDP
))
1687 selected_fields
|= MLX5_HASH_FIELD_SEL_L4_SPORT
;
1689 if ((ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_PORT_TCP
) ||
1690 (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_DST_PORT_UDP
))
1691 selected_fields
|= MLX5_HASH_FIELD_SEL_L4_DPORT
;
1693 if (ucmd
->rx_hash_fields_mask
& MLX5_RX_HASH_IPSEC_SPI
)
1694 selected_fields
|= MLX5_HASH_FIELD_SEL_IPSEC_SPI
;
1696 MLX5_SET(rx_hash_field_select
, hfso
, selected_fields
, selected_fields
);
1699 MLX5_SET(create_tir_in
, in
, opcode
, MLX5_CMD_OP_CREATE_TIR
);
1700 err
= mlx5_cmd_exec_inout(dev
->mdev
, create_tir
, in
, out
);
1702 qp
->rss_qp
.tirn
= MLX5_GET(create_tir_out
, out
, tirn
);
1703 if (!err
&& MLX5_GET(tirc
, tirc
, self_lb_block
)) {
1704 err
= mlx5_ib_enable_lb(dev
, false, true);
1707 mlx5_cmd_destroy_tir(dev
->mdev
, qp
->rss_qp
.tirn
,
1714 if (mucontext
->devx_uid
) {
1715 params
->resp
.comp_mask
|= MLX5_IB_CREATE_QP_RESP_MASK_TIRN
;
1716 params
->resp
.tirn
= qp
->rss_qp
.tirn
;
1717 if (MLX5_CAP_FLOWTABLE_NIC_RX(dev
->mdev
, sw_owner
) ||
1718 MLX5_CAP_FLOWTABLE_NIC_RX(dev
->mdev
, sw_owner_v2
)) {
1719 params
->resp
.tir_icm_addr
=
1720 MLX5_GET(create_tir_out
, out
, icm_address_31_0
);
1721 params
->resp
.tir_icm_addr
|=
1722 (u64
)MLX5_GET(create_tir_out
, out
,
1725 params
->resp
.tir_icm_addr
|=
1726 (u64
)MLX5_GET(create_tir_out
, out
,
1729 params
->resp
.comp_mask
|=
1730 MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR
;
1735 /* qpn is reserved for that QP */
1736 qp
->trans_qp
.base
.mqp
.qpn
= 0;
1745 static void configure_requester_scat_cqe(struct mlx5_ib_dev
*dev
,
1746 struct mlx5_ib_qp
*qp
,
1747 struct ib_qp_init_attr
*init_attr
,
1751 bool allow_scat_cqe
= false;
1753 allow_scat_cqe
= qp
->flags_en
& MLX5_QP_FLAG_ALLOW_SCATTER_CQE
;
1755 if (!allow_scat_cqe
&& init_attr
->sq_sig_type
!= IB_SIGNAL_ALL_WR
)
1758 scqe_sz
= mlx5_ib_get_cqe_size(init_attr
->send_cq
);
1759 if (scqe_sz
== 128) {
1760 MLX5_SET(qpc
, qpc
, cs_req
, MLX5_REQ_SCAT_DATA64_CQE
);
1764 if (init_attr
->qp_type
!= MLX5_IB_QPT_DCI
||
1765 MLX5_CAP_GEN(dev
->mdev
, dc_req_scat_data_cqe
))
1766 MLX5_SET(qpc
, qpc
, cs_req
, MLX5_REQ_SCAT_DATA32_CQE
);
1769 static int atomic_size_to_mode(int size_mask
)
1771 /* driver does not support atomic_size > 256B
1772 * and does not know how to translate bigger sizes
1774 int supported_size_mask
= size_mask
& 0x1ff;
1777 if (!supported_size_mask
)
1780 log_max_size
= __fls(supported_size_mask
);
1782 if (log_max_size
> 3)
1783 return log_max_size
;
1785 return MLX5_ATOMIC_MODE_8B
;
1788 static int get_atomic_mode(struct mlx5_ib_dev
*dev
,
1789 enum ib_qp_type qp_type
)
1791 u8 atomic_operations
= MLX5_CAP_ATOMIC(dev
->mdev
, atomic_operations
);
1792 u8 atomic
= MLX5_CAP_GEN(dev
->mdev
, atomic
);
1793 int atomic_mode
= -EOPNOTSUPP
;
1794 int atomic_size_mask
;
1799 if (qp_type
== MLX5_IB_QPT_DCT
)
1800 atomic_size_mask
= MLX5_CAP_ATOMIC(dev
->mdev
, atomic_size_dc
);
1802 atomic_size_mask
= MLX5_CAP_ATOMIC(dev
->mdev
, atomic_size_qp
);
1804 if ((atomic_operations
& MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP
) ||
1805 (atomic_operations
& MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD
))
1806 atomic_mode
= atomic_size_to_mode(atomic_size_mask
);
1808 if (atomic_mode
<= 0 &&
1809 (atomic_operations
& MLX5_ATOMIC_OPS_CMP_SWAP
&&
1810 atomic_operations
& MLX5_ATOMIC_OPS_FETCH_ADD
))
1811 atomic_mode
= MLX5_ATOMIC_MODE_IB_COMP
;
1816 static int create_xrc_tgt_qp(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
1817 struct mlx5_create_qp_params
*params
)
1819 struct mlx5_ib_create_qp
*ucmd
= params
->ucmd
;
1820 struct ib_qp_init_attr
*attr
= params
->attr
;
1821 u32 uidx
= params
->uidx
;
1822 struct mlx5_ib_resources
*devr
= &dev
->devr
;
1823 u32 out
[MLX5_ST_SZ_DW(create_qp_out
)] = {};
1824 int inlen
= MLX5_ST_SZ_BYTES(create_qp_in
);
1825 struct mlx5_core_dev
*mdev
= dev
->mdev
;
1826 struct mlx5_ib_qp_base
*base
;
1827 unsigned long flags
;
1832 if (attr
->sq_sig_type
== IB_SIGNAL_ALL_WR
)
1833 qp
->sq_signal_bits
= MLX5_WQE_CTRL_CQ_UPDATE
;
1835 in
= kvzalloc(inlen
, GFP_KERNEL
);
1839 if (MLX5_CAP_GEN(mdev
, ece_support
) && ucmd
)
1840 MLX5_SET(create_qp_in
, in
, ece
, ucmd
->ece_options
);
1841 qpc
= MLX5_ADDR_OF(create_qp_in
, in
, qpc
);
1843 MLX5_SET(qpc
, qpc
, st
, MLX5_QP_ST_XRC
);
1844 MLX5_SET(qpc
, qpc
, pm_state
, MLX5_QP_PM_MIGRATED
);
1845 MLX5_SET(qpc
, qpc
, pd
, to_mpd(devr
->p0
)->pdn
);
1847 if (qp
->flags
& IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK
)
1848 MLX5_SET(qpc
, qpc
, block_lb_mc
, 1);
1849 if (qp
->flags
& IB_QP_CREATE_CROSS_CHANNEL
)
1850 MLX5_SET(qpc
, qpc
, cd_master
, 1);
1851 if (qp
->flags
& IB_QP_CREATE_MANAGED_SEND
)
1852 MLX5_SET(qpc
, qpc
, cd_slave_send
, 1);
1853 if (qp
->flags
& IB_QP_CREATE_MANAGED_RECV
)
1854 MLX5_SET(qpc
, qpc
, cd_slave_receive
, 1);
1856 MLX5_SET(qpc
, qpc
, rq_type
, MLX5_SRQ_RQ
);
1857 MLX5_SET(qpc
, qpc
, no_sq
, 1);
1858 MLX5_SET(qpc
, qpc
, cqn_rcv
, to_mcq(devr
->c0
)->mcq
.cqn
);
1859 MLX5_SET(qpc
, qpc
, cqn_snd
, to_mcq(devr
->c0
)->mcq
.cqn
);
1860 MLX5_SET(qpc
, qpc
, srqn_rmpn_xrqn
, to_msrq(devr
->s0
)->msrq
.srqn
);
1861 MLX5_SET(qpc
, qpc
, xrcd
, to_mxrcd(attr
->xrcd
)->xrcdn
);
1862 MLX5_SET64(qpc
, qpc
, dbr_addr
, qp
->db
.dma
);
1864 /* 0xffffff means we ask to work with cqe version 0 */
1865 if (MLX5_CAP_GEN(mdev
, cqe_version
) == MLX5_CQE_VERSION_V1
)
1866 MLX5_SET(qpc
, qpc
, user_index
, uidx
);
1868 if (qp
->flags
& IB_QP_CREATE_PCI_WRITE_END_PADDING
) {
1869 MLX5_SET(qpc
, qpc
, end_padding_mode
,
1870 MLX5_WQ_END_PAD_MODE_ALIGN
);
1871 /* Special case to clean flag */
1872 qp
->flags
&= ~IB_QP_CREATE_PCI_WRITE_END_PADDING
;
1875 base
= &qp
->trans_qp
.base
;
1876 err
= mlx5_qpc_create_qp(dev
, &base
->mqp
, in
, inlen
, out
);
1881 base
->container_mibqp
= qp
;
1882 base
->mqp
.event
= mlx5_ib_qp_event
;
1883 if (MLX5_CAP_GEN(mdev
, ece_support
))
1884 params
->resp
.ece_options
= MLX5_GET(create_qp_out
, out
, ece
);
1886 spin_lock_irqsave(&dev
->reset_flow_resource_lock
, flags
);
1887 list_add_tail(&qp
->qps_list
, &dev
->qp_list
);
1888 spin_unlock_irqrestore(&dev
->reset_flow_resource_lock
, flags
);
1890 qp
->trans_qp
.xrcdn
= to_mxrcd(attr
->xrcd
)->xrcdn
;
1894 static int create_user_qp(struct mlx5_ib_dev
*dev
, struct ib_pd
*pd
,
1895 struct mlx5_ib_qp
*qp
,
1896 struct mlx5_create_qp_params
*params
)
1898 struct ib_qp_init_attr
*init_attr
= params
->attr
;
1899 struct mlx5_ib_create_qp
*ucmd
= params
->ucmd
;
1900 u32 out
[MLX5_ST_SZ_DW(create_qp_out
)] = {};
1901 struct ib_udata
*udata
= params
->udata
;
1902 u32 uidx
= params
->uidx
;
1903 struct mlx5_ib_resources
*devr
= &dev
->devr
;
1904 int inlen
= MLX5_ST_SZ_BYTES(create_qp_in
);
1905 struct mlx5_core_dev
*mdev
= dev
->mdev
;
1906 struct mlx5_ib_cq
*send_cq
;
1907 struct mlx5_ib_cq
*recv_cq
;
1908 unsigned long flags
;
1909 struct mlx5_ib_qp_base
*base
;
1915 spin_lock_init(&qp
->sq
.lock
);
1916 spin_lock_init(&qp
->rq
.lock
);
1918 mlx5_st
= to_mlx5_st(qp
->type
);
1922 if (init_attr
->sq_sig_type
== IB_SIGNAL_ALL_WR
)
1923 qp
->sq_signal_bits
= MLX5_WQE_CTRL_CQ_UPDATE
;
1925 if (qp
->flags
& IB_QP_CREATE_SOURCE_QPN
)
1926 qp
->underlay_qpn
= init_attr
->source_qpn
;
1928 base
= (init_attr
->qp_type
== IB_QPT_RAW_PACKET
||
1929 qp
->flags
& IB_QP_CREATE_SOURCE_QPN
) ?
1930 &qp
->raw_packet_qp
.rq
.base
:
1933 qp
->has_rq
= qp_has_rq(init_attr
);
1934 err
= set_rq_size(dev
, &init_attr
->cap
, qp
->has_rq
, qp
, ucmd
);
1936 mlx5_ib_dbg(dev
, "err %d\n", err
);
1940 if (ucmd
->rq_wqe_shift
!= qp
->rq
.wqe_shift
||
1941 ucmd
->rq_wqe_count
!= qp
->rq
.wqe_cnt
)
1944 if (ucmd
->sq_wqe_count
> (1 << MLX5_CAP_GEN(mdev
, log_max_qp_sz
)))
1947 err
= _create_user_qp(dev
, pd
, qp
, udata
, init_attr
, &in
, ¶ms
->resp
,
1948 &inlen
, base
, ucmd
);
1952 if (is_sqp(init_attr
->qp_type
))
1953 qp
->port
= init_attr
->port_num
;
1955 if (MLX5_CAP_GEN(mdev
, ece_support
))
1956 MLX5_SET(create_qp_in
, in
, ece
, ucmd
->ece_options
);
1957 qpc
= MLX5_ADDR_OF(create_qp_in
, in
, qpc
);
1959 MLX5_SET(qpc
, qpc
, st
, mlx5_st
);
1960 MLX5_SET(qpc
, qpc
, pm_state
, MLX5_QP_PM_MIGRATED
);
1961 MLX5_SET(qpc
, qpc
, pd
, to_mpd(pd
)->pdn
);
1963 if (qp
->flags_en
& MLX5_QP_FLAG_SIGNATURE
)
1964 MLX5_SET(qpc
, qpc
, wq_signature
, 1);
1966 if (qp
->flags
& IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK
)
1967 MLX5_SET(qpc
, qpc
, block_lb_mc
, 1);
1969 if (qp
->flags
& IB_QP_CREATE_CROSS_CHANNEL
)
1970 MLX5_SET(qpc
, qpc
, cd_master
, 1);
1971 if (qp
->flags
& IB_QP_CREATE_MANAGED_SEND
)
1972 MLX5_SET(qpc
, qpc
, cd_slave_send
, 1);
1973 if (qp
->flags
& IB_QP_CREATE_MANAGED_RECV
)
1974 MLX5_SET(qpc
, qpc
, cd_slave_receive
, 1);
1975 if (qp
->flags_en
& MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE
)
1976 MLX5_SET(qpc
, qpc
, req_e2e_credit_mode
, 1);
1977 if ((qp
->flags_en
& MLX5_QP_FLAG_SCATTER_CQE
) &&
1978 (init_attr
->qp_type
== IB_QPT_RC
||
1979 init_attr
->qp_type
== IB_QPT_UC
)) {
1980 int rcqe_sz
= mlx5_ib_get_cqe_size(init_attr
->recv_cq
);
1982 MLX5_SET(qpc
, qpc
, cs_res
,
1983 rcqe_sz
== 128 ? MLX5_RES_SCAT_DATA64_CQE
:
1984 MLX5_RES_SCAT_DATA32_CQE
);
1986 if ((qp
->flags_en
& MLX5_QP_FLAG_SCATTER_CQE
) &&
1987 (qp
->type
== MLX5_IB_QPT_DCI
|| qp
->type
== IB_QPT_RC
))
1988 configure_requester_scat_cqe(dev
, qp
, init_attr
, qpc
);
1990 if (qp
->rq
.wqe_cnt
) {
1991 MLX5_SET(qpc
, qpc
, log_rq_stride
, qp
->rq
.wqe_shift
- 4);
1992 MLX5_SET(qpc
, qpc
, log_rq_size
, ilog2(qp
->rq
.wqe_cnt
));
1995 MLX5_SET(qpc
, qpc
, rq_type
, get_rx_type(qp
, init_attr
));
1997 if (qp
->sq
.wqe_cnt
) {
1998 MLX5_SET(qpc
, qpc
, log_sq_size
, ilog2(qp
->sq
.wqe_cnt
));
2000 MLX5_SET(qpc
, qpc
, no_sq
, 1);
2001 if (init_attr
->srq
&&
2002 init_attr
->srq
->srq_type
== IB_SRQT_TM
)
2003 MLX5_SET(qpc
, qpc
, offload_type
,
2004 MLX5_QPC_OFFLOAD_TYPE_RNDV
);
2007 /* Set default resources */
2008 switch (init_attr
->qp_type
) {
2009 case IB_QPT_XRC_INI
:
2010 MLX5_SET(qpc
, qpc
, cqn_rcv
, to_mcq(devr
->c0
)->mcq
.cqn
);
2011 MLX5_SET(qpc
, qpc
, xrcd
, devr
->xrcdn1
);
2012 MLX5_SET(qpc
, qpc
, srqn_rmpn_xrqn
, to_msrq(devr
->s0
)->msrq
.srqn
);
2015 if (init_attr
->srq
) {
2016 MLX5_SET(qpc
, qpc
, xrcd
, devr
->xrcdn0
);
2017 MLX5_SET(qpc
, qpc
, srqn_rmpn_xrqn
, to_msrq(init_attr
->srq
)->msrq
.srqn
);
2019 MLX5_SET(qpc
, qpc
, xrcd
, devr
->xrcdn1
);
2020 MLX5_SET(qpc
, qpc
, srqn_rmpn_xrqn
, to_msrq(devr
->s1
)->msrq
.srqn
);
2024 if (init_attr
->send_cq
)
2025 MLX5_SET(qpc
, qpc
, cqn_snd
, to_mcq(init_attr
->send_cq
)->mcq
.cqn
);
2027 if (init_attr
->recv_cq
)
2028 MLX5_SET(qpc
, qpc
, cqn_rcv
, to_mcq(init_attr
->recv_cq
)->mcq
.cqn
);
2030 MLX5_SET64(qpc
, qpc
, dbr_addr
, qp
->db
.dma
);
2032 /* 0xffffff means we ask to work with cqe version 0 */
2033 if (MLX5_CAP_GEN(mdev
, cqe_version
) == MLX5_CQE_VERSION_V1
)
2034 MLX5_SET(qpc
, qpc
, user_index
, uidx
);
2036 if (qp
->flags
& IB_QP_CREATE_PCI_WRITE_END_PADDING
&&
2037 init_attr
->qp_type
!= IB_QPT_RAW_PACKET
) {
2038 MLX5_SET(qpc
, qpc
, end_padding_mode
,
2039 MLX5_WQ_END_PAD_MODE_ALIGN
);
2040 /* Special case to clean flag */
2041 qp
->flags
&= ~IB_QP_CREATE_PCI_WRITE_END_PADDING
;
2044 if (init_attr
->qp_type
== IB_QPT_RAW_PACKET
||
2045 qp
->flags
& IB_QP_CREATE_SOURCE_QPN
) {
2046 qp
->raw_packet_qp
.sq
.ubuffer
.buf_addr
= ucmd
->sq_buf_addr
;
2047 raw_packet_qp_copy_info(qp
, &qp
->raw_packet_qp
);
2048 err
= create_raw_packet_qp(dev
, qp
, in
, inlen
, pd
, udata
,
2051 err
= mlx5_qpc_create_qp(dev
, &base
->mqp
, in
, inlen
, out
);
2057 base
->container_mibqp
= qp
;
2058 base
->mqp
.event
= mlx5_ib_qp_event
;
2059 if (MLX5_CAP_GEN(mdev
, ece_support
))
2060 params
->resp
.ece_options
= MLX5_GET(create_qp_out
, out
, ece
);
2062 get_cqs(qp
->type
, init_attr
->send_cq
, init_attr
->recv_cq
,
2063 &send_cq
, &recv_cq
);
2064 spin_lock_irqsave(&dev
->reset_flow_resource_lock
, flags
);
2065 mlx5_ib_lock_cqs(send_cq
, recv_cq
);
2066 /* Maintain device to QPs access, needed for further handling via reset
2069 list_add_tail(&qp
->qps_list
, &dev
->qp_list
);
2070 /* Maintain CQ to QPs access, needed for further handling via reset flow
2073 list_add_tail(&qp
->cq_send_list
, &send_cq
->list_send_qp
);
2075 list_add_tail(&qp
->cq_recv_list
, &recv_cq
->list_recv_qp
);
2076 mlx5_ib_unlock_cqs(send_cq
, recv_cq
);
2077 spin_unlock_irqrestore(&dev
->reset_flow_resource_lock
, flags
);
2082 destroy_qp(dev
, qp
, base
, udata
);
2086 static int create_kernel_qp(struct mlx5_ib_dev
*dev
, struct ib_pd
*pd
,
2087 struct mlx5_ib_qp
*qp
,
2088 struct mlx5_create_qp_params
*params
)
2090 struct ib_qp_init_attr
*attr
= params
->attr
;
2091 u32 uidx
= params
->uidx
;
2092 struct mlx5_ib_resources
*devr
= &dev
->devr
;
2093 u32 out
[MLX5_ST_SZ_DW(create_qp_out
)] = {};
2094 int inlen
= MLX5_ST_SZ_BYTES(create_qp_in
);
2095 struct mlx5_core_dev
*mdev
= dev
->mdev
;
2096 struct mlx5_ib_cq
*send_cq
;
2097 struct mlx5_ib_cq
*recv_cq
;
2098 unsigned long flags
;
2099 struct mlx5_ib_qp_base
*base
;
2105 spin_lock_init(&qp
->sq
.lock
);
2106 spin_lock_init(&qp
->rq
.lock
);
2108 mlx5_st
= to_mlx5_st(qp
->type
);
2112 if (attr
->sq_sig_type
== IB_SIGNAL_ALL_WR
)
2113 qp
->sq_signal_bits
= MLX5_WQE_CTRL_CQ_UPDATE
;
2115 base
= &qp
->trans_qp
.base
;
2117 qp
->has_rq
= qp_has_rq(attr
);
2118 err
= set_rq_size(dev
, &attr
->cap
, qp
->has_rq
, qp
, NULL
);
2120 mlx5_ib_dbg(dev
, "err %d\n", err
);
2124 err
= _create_kernel_qp(dev
, attr
, qp
, &in
, &inlen
, base
);
2128 if (is_sqp(attr
->qp_type
))
2129 qp
->port
= attr
->port_num
;
2131 qpc
= MLX5_ADDR_OF(create_qp_in
, in
, qpc
);
2133 MLX5_SET(qpc
, qpc
, st
, mlx5_st
);
2134 MLX5_SET(qpc
, qpc
, pm_state
, MLX5_QP_PM_MIGRATED
);
2136 if (attr
->qp_type
!= MLX5_IB_QPT_REG_UMR
)
2137 MLX5_SET(qpc
, qpc
, pd
, to_mpd(pd
? pd
: devr
->p0
)->pdn
);
2139 MLX5_SET(qpc
, qpc
, latency_sensitive
, 1);
2142 if (qp
->flags
& IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK
)
2143 MLX5_SET(qpc
, qpc
, block_lb_mc
, 1);
2145 if (qp
->rq
.wqe_cnt
) {
2146 MLX5_SET(qpc
, qpc
, log_rq_stride
, qp
->rq
.wqe_shift
- 4);
2147 MLX5_SET(qpc
, qpc
, log_rq_size
, ilog2(qp
->rq
.wqe_cnt
));
2150 MLX5_SET(qpc
, qpc
, rq_type
, get_rx_type(qp
, attr
));
2153 MLX5_SET(qpc
, qpc
, log_sq_size
, ilog2(qp
->sq
.wqe_cnt
));
2155 MLX5_SET(qpc
, qpc
, no_sq
, 1);
2158 MLX5_SET(qpc
, qpc
, xrcd
, devr
->xrcdn0
);
2159 MLX5_SET(qpc
, qpc
, srqn_rmpn_xrqn
,
2160 to_msrq(attr
->srq
)->msrq
.srqn
);
2162 MLX5_SET(qpc
, qpc
, xrcd
, devr
->xrcdn1
);
2163 MLX5_SET(qpc
, qpc
, srqn_rmpn_xrqn
,
2164 to_msrq(devr
->s1
)->msrq
.srqn
);
2168 MLX5_SET(qpc
, qpc
, cqn_snd
, to_mcq(attr
->send_cq
)->mcq
.cqn
);
2171 MLX5_SET(qpc
, qpc
, cqn_rcv
, to_mcq(attr
->recv_cq
)->mcq
.cqn
);
2173 MLX5_SET64(qpc
, qpc
, dbr_addr
, qp
->db
.dma
);
2175 /* 0xffffff means we ask to work with cqe version 0 */
2176 if (MLX5_CAP_GEN(mdev
, cqe_version
) == MLX5_CQE_VERSION_V1
)
2177 MLX5_SET(qpc
, qpc
, user_index
, uidx
);
2179 /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
2180 if (qp
->flags
& IB_QP_CREATE_IPOIB_UD_LSO
)
2181 MLX5_SET(qpc
, qpc
, ulp_stateless_offload_mode
, 1);
2183 err
= mlx5_qpc_create_qp(dev
, &base
->mqp
, in
, inlen
, out
);
2188 base
->container_mibqp
= qp
;
2189 base
->mqp
.event
= mlx5_ib_qp_event
;
2191 get_cqs(qp
->type
, attr
->send_cq
, attr
->recv_cq
,
2192 &send_cq
, &recv_cq
);
2193 spin_lock_irqsave(&dev
->reset_flow_resource_lock
, flags
);
2194 mlx5_ib_lock_cqs(send_cq
, recv_cq
);
2195 /* Maintain device to QPs access, needed for further handling via reset
2198 list_add_tail(&qp
->qps_list
, &dev
->qp_list
);
2199 /* Maintain CQ to QPs access, needed for further handling via reset flow
2202 list_add_tail(&qp
->cq_send_list
, &send_cq
->list_send_qp
);
2204 list_add_tail(&qp
->cq_recv_list
, &recv_cq
->list_recv_qp
);
2205 mlx5_ib_unlock_cqs(send_cq
, recv_cq
);
2206 spin_unlock_irqrestore(&dev
->reset_flow_resource_lock
, flags
);
2211 destroy_qp(dev
, qp
, base
, NULL
);
2215 static void mlx5_ib_lock_cqs(struct mlx5_ib_cq
*send_cq
, struct mlx5_ib_cq
*recv_cq
)
2216 __acquires(&send_cq
->lock
) __acquires(&recv_cq
->lock
)
2220 if (send_cq
->mcq
.cqn
< recv_cq
->mcq
.cqn
) {
2221 spin_lock(&send_cq
->lock
);
2222 spin_lock_nested(&recv_cq
->lock
,
2223 SINGLE_DEPTH_NESTING
);
2224 } else if (send_cq
->mcq
.cqn
== recv_cq
->mcq
.cqn
) {
2225 spin_lock(&send_cq
->lock
);
2226 __acquire(&recv_cq
->lock
);
2228 spin_lock(&recv_cq
->lock
);
2229 spin_lock_nested(&send_cq
->lock
,
2230 SINGLE_DEPTH_NESTING
);
2233 spin_lock(&send_cq
->lock
);
2234 __acquire(&recv_cq
->lock
);
2236 } else if (recv_cq
) {
2237 spin_lock(&recv_cq
->lock
);
2238 __acquire(&send_cq
->lock
);
2240 __acquire(&send_cq
->lock
);
2241 __acquire(&recv_cq
->lock
);
2245 static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq
*send_cq
, struct mlx5_ib_cq
*recv_cq
)
2246 __releases(&send_cq
->lock
) __releases(&recv_cq
->lock
)
2250 if (send_cq
->mcq
.cqn
< recv_cq
->mcq
.cqn
) {
2251 spin_unlock(&recv_cq
->lock
);
2252 spin_unlock(&send_cq
->lock
);
2253 } else if (send_cq
->mcq
.cqn
== recv_cq
->mcq
.cqn
) {
2254 __release(&recv_cq
->lock
);
2255 spin_unlock(&send_cq
->lock
);
2257 spin_unlock(&send_cq
->lock
);
2258 spin_unlock(&recv_cq
->lock
);
2261 __release(&recv_cq
->lock
);
2262 spin_unlock(&send_cq
->lock
);
2264 } else if (recv_cq
) {
2265 __release(&send_cq
->lock
);
2266 spin_unlock(&recv_cq
->lock
);
2268 __release(&recv_cq
->lock
);
2269 __release(&send_cq
->lock
);
2273 static void get_cqs(enum ib_qp_type qp_type
,
2274 struct ib_cq
*ib_send_cq
, struct ib_cq
*ib_recv_cq
,
2275 struct mlx5_ib_cq
**send_cq
, struct mlx5_ib_cq
**recv_cq
)
2278 case IB_QPT_XRC_TGT
:
2282 case MLX5_IB_QPT_REG_UMR
:
2283 case IB_QPT_XRC_INI
:
2284 *send_cq
= ib_send_cq
? to_mcq(ib_send_cq
) : NULL
;
2289 case MLX5_IB_QPT_HW_GSI
:
2293 case IB_QPT_RAW_PACKET
:
2294 *send_cq
= ib_send_cq
? to_mcq(ib_send_cq
) : NULL
;
2295 *recv_cq
= ib_recv_cq
? to_mcq(ib_recv_cq
) : NULL
;
2304 static int modify_raw_packet_qp(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
2305 const struct mlx5_modify_raw_qp_param
*raw_qp_param
,
2306 u8 lag_tx_affinity
);
2308 static void destroy_qp_common(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
2309 struct ib_udata
*udata
)
2311 struct mlx5_ib_cq
*send_cq
, *recv_cq
;
2312 struct mlx5_ib_qp_base
*base
;
2313 unsigned long flags
;
2317 destroy_rss_raw_qp_tir(dev
, qp
);
2321 base
= (qp
->type
== IB_QPT_RAW_PACKET
||
2322 qp
->flags
& IB_QP_CREATE_SOURCE_QPN
) ?
2323 &qp
->raw_packet_qp
.rq
.base
:
2326 if (qp
->state
!= IB_QPS_RESET
) {
2327 if (qp
->type
!= IB_QPT_RAW_PACKET
&&
2328 !(qp
->flags
& IB_QP_CREATE_SOURCE_QPN
)) {
2329 err
= mlx5_core_qp_modify(dev
, MLX5_CMD_OP_2RST_QP
, 0,
2330 NULL
, &base
->mqp
, NULL
);
2332 struct mlx5_modify_raw_qp_param raw_qp_param
= {
2333 .operation
= MLX5_CMD_OP_2RST_QP
2336 err
= modify_raw_packet_qp(dev
, qp
, &raw_qp_param
, 0);
2339 mlx5_ib_warn(dev
, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
2343 get_cqs(qp
->type
, qp
->ibqp
.send_cq
, qp
->ibqp
.recv_cq
, &send_cq
,
2346 spin_lock_irqsave(&dev
->reset_flow_resource_lock
, flags
);
2347 mlx5_ib_lock_cqs(send_cq
, recv_cq
);
2348 /* del from lists under both locks above to protect reset flow paths */
2349 list_del(&qp
->qps_list
);
2351 list_del(&qp
->cq_send_list
);
2354 list_del(&qp
->cq_recv_list
);
2357 __mlx5_ib_cq_clean(recv_cq
, base
->mqp
.qpn
,
2358 qp
->ibqp
.srq
? to_msrq(qp
->ibqp
.srq
) : NULL
);
2359 if (send_cq
!= recv_cq
)
2360 __mlx5_ib_cq_clean(send_cq
, base
->mqp
.qpn
,
2363 mlx5_ib_unlock_cqs(send_cq
, recv_cq
);
2364 spin_unlock_irqrestore(&dev
->reset_flow_resource_lock
, flags
);
2366 if (qp
->type
== IB_QPT_RAW_PACKET
||
2367 qp
->flags
& IB_QP_CREATE_SOURCE_QPN
) {
2368 destroy_raw_packet_qp(dev
, qp
);
2370 err
= mlx5_core_destroy_qp(dev
, &base
->mqp
);
2372 mlx5_ib_warn(dev
, "failed to destroy QP 0x%x\n",
2376 destroy_qp(dev
, qp
, base
, udata
);
2379 static int create_dct(struct mlx5_ib_dev
*dev
, struct ib_pd
*pd
,
2380 struct mlx5_ib_qp
*qp
,
2381 struct mlx5_create_qp_params
*params
)
2383 struct ib_qp_init_attr
*attr
= params
->attr
;
2384 struct mlx5_ib_create_qp
*ucmd
= params
->ucmd
;
2385 u32 uidx
= params
->uidx
;
2388 if (mlx5_lag_is_active(dev
->mdev
) && !MLX5_CAP_GEN(dev
->mdev
, lag_dct
))
2391 qp
->dct
.in
= kzalloc(MLX5_ST_SZ_BYTES(create_dct_in
), GFP_KERNEL
);
2395 MLX5_SET(create_dct_in
, qp
->dct
.in
, uid
, to_mpd(pd
)->uid
);
2396 dctc
= MLX5_ADDR_OF(create_dct_in
, qp
->dct
.in
, dct_context_entry
);
2397 MLX5_SET(dctc
, dctc
, pd
, to_mpd(pd
)->pdn
);
2398 MLX5_SET(dctc
, dctc
, srqn_xrqn
, to_msrq(attr
->srq
)->msrq
.srqn
);
2399 MLX5_SET(dctc
, dctc
, cqn
, to_mcq(attr
->recv_cq
)->mcq
.cqn
);
2400 MLX5_SET64(dctc
, dctc
, dc_access_key
, ucmd
->access_key
);
2401 MLX5_SET(dctc
, dctc
, user_index
, uidx
);
2402 if (MLX5_CAP_GEN(dev
->mdev
, ece_support
))
2403 MLX5_SET(dctc
, dctc
, ece
, ucmd
->ece_options
);
2405 if (qp
->flags_en
& MLX5_QP_FLAG_SCATTER_CQE
) {
2406 int rcqe_sz
= mlx5_ib_get_cqe_size(attr
->recv_cq
);
2409 MLX5_SET(dctc
, dctc
, cs_res
, MLX5_RES_SCAT_DATA64_CQE
);
2412 qp
->state
= IB_QPS_RESET
;
2413 rdma_restrack_no_track(&qp
->ibqp
.res
);
2417 static int check_qp_type(struct mlx5_ib_dev
*dev
, struct ib_qp_init_attr
*attr
,
2418 enum ib_qp_type
*type
)
2420 if (attr
->qp_type
== IB_QPT_DRIVER
&& !MLX5_CAP_GEN(dev
->mdev
, dct
))
2423 switch (attr
->qp_type
) {
2424 case IB_QPT_XRC_TGT
:
2425 case IB_QPT_XRC_INI
:
2426 if (!MLX5_CAP_GEN(dev
->mdev
, xrc
))
2432 case MLX5_IB_QPT_HW_GSI
:
2435 if (dev
->profile
== &raw_eth_profile
)
2438 case IB_QPT_RAW_PACKET
:
2440 case MLX5_IB_QPT_REG_UMR
:
2446 *type
= attr
->qp_type
;
2450 mlx5_ib_dbg(dev
, "Unsupported QP type %d\n", attr
->qp_type
);
2454 static int check_valid_flow(struct mlx5_ib_dev
*dev
, struct ib_pd
*pd
,
2455 struct ib_qp_init_attr
*attr
,
2456 struct ib_udata
*udata
)
2458 struct mlx5_ib_ucontext
*ucontext
= rdma_udata_to_drv_context(
2459 udata
, struct mlx5_ib_ucontext
, ibucontext
);
2462 /* Kernel create_qp callers */
2463 if (attr
->rwq_ind_tbl
)
2466 switch (attr
->qp_type
) {
2467 case IB_QPT_RAW_PACKET
:
2475 /* Userspace create_qp callers */
2476 if (attr
->qp_type
== IB_QPT_RAW_PACKET
&& !ucontext
->cqe_version
) {
2478 "Raw Packet QP is only supported for CQE version > 0\n");
2482 if (attr
->qp_type
!= IB_QPT_RAW_PACKET
&& attr
->rwq_ind_tbl
) {
2484 "Wrong QP type %d for the RWQ indirect table\n",
2490 * We don't need to see this warning, it means that kernel code
2491 * missing ib_pd. Placed here to catch developer's mistakes.
2493 WARN_ONCE(!pd
&& attr
->qp_type
!= IB_QPT_XRC_TGT
,
2494 "There is a missing PD pointer assignment\n");
2498 static void process_vendor_flag(struct mlx5_ib_dev
*dev
, int *flags
, int flag
,
2499 bool cond
, struct mlx5_ib_qp
*qp
)
2501 if (!(*flags
& flag
))
2505 qp
->flags_en
|= flag
;
2511 case MLX5_QP_FLAG_SCATTER_CQE
:
2512 case MLX5_QP_FLAG_ALLOW_SCATTER_CQE
:
2514 * We don't return error if these flags were provided,
2515 * and mlx5 doesn't have right capability.
2517 *flags
&= ~(MLX5_QP_FLAG_SCATTER_CQE
|
2518 MLX5_QP_FLAG_ALLOW_SCATTER_CQE
);
2523 mlx5_ib_dbg(dev
, "Vendor create QP flag 0x%X is not supported\n", flag
);
2526 static int process_vendor_flags(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
2527 void *ucmd
, struct ib_qp_init_attr
*attr
)
2529 struct mlx5_core_dev
*mdev
= dev
->mdev
;
2533 if (attr
->rwq_ind_tbl
)
2534 flags
= ((struct mlx5_ib_create_qp_rss
*)ucmd
)->flags
;
2536 flags
= ((struct mlx5_ib_create_qp
*)ucmd
)->flags
;
2538 switch (flags
& (MLX5_QP_FLAG_TYPE_DCT
| MLX5_QP_FLAG_TYPE_DCI
)) {
2539 case MLX5_QP_FLAG_TYPE_DCI
:
2540 qp
->type
= MLX5_IB_QPT_DCI
;
2542 case MLX5_QP_FLAG_TYPE_DCT
:
2543 qp
->type
= MLX5_IB_QPT_DCT
;
2546 if (qp
->type
!= IB_QPT_DRIVER
)
2549 * It is IB_QPT_DRIVER and or no subtype or
2550 * wrong subtype were provided.
2555 process_vendor_flag(dev
, &flags
, MLX5_QP_FLAG_TYPE_DCI
, true, qp
);
2556 process_vendor_flag(dev
, &flags
, MLX5_QP_FLAG_TYPE_DCT
, true, qp
);
2558 process_vendor_flag(dev
, &flags
, MLX5_QP_FLAG_SIGNATURE
, true, qp
);
2559 process_vendor_flag(dev
, &flags
, MLX5_QP_FLAG_SCATTER_CQE
,
2560 MLX5_CAP_GEN(mdev
, sctr_data_cqe
), qp
);
2561 process_vendor_flag(dev
, &flags
, MLX5_QP_FLAG_ALLOW_SCATTER_CQE
,
2562 MLX5_CAP_GEN(mdev
, sctr_data_cqe
), qp
);
2564 if (qp
->type
== IB_QPT_RAW_PACKET
) {
2565 cond
= MLX5_CAP_ETH(mdev
, tunnel_stateless_vxlan
) ||
2566 MLX5_CAP_ETH(mdev
, tunnel_stateless_gre
) ||
2567 MLX5_CAP_ETH(mdev
, tunnel_stateless_geneve_rx
);
2568 process_vendor_flag(dev
, &flags
, MLX5_QP_FLAG_TUNNEL_OFFLOADS
,
2570 process_vendor_flag(dev
, &flags
,
2571 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC
, true,
2573 process_vendor_flag(dev
, &flags
,
2574 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC
, true,
2578 if (qp
->type
== IB_QPT_RC
)
2579 process_vendor_flag(dev
, &flags
,
2580 MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE
,
2581 MLX5_CAP_GEN(mdev
, qp_packet_based
), qp
);
2583 process_vendor_flag(dev
, &flags
, MLX5_QP_FLAG_BFREG_INDEX
, true, qp
);
2584 process_vendor_flag(dev
, &flags
, MLX5_QP_FLAG_UAR_PAGE_INDEX
, true, qp
);
2586 cond
= qp
->flags_en
& ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS
|
2587 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC
|
2588 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC
);
2589 if (attr
->rwq_ind_tbl
&& cond
) {
2590 mlx5_ib_dbg(dev
, "RSS RAW QP has unsupported flags 0x%X\n",
2596 mlx5_ib_dbg(dev
, "udata has unsupported flags 0x%X\n", flags
);
2598 return (flags
) ? -EINVAL
: 0;
2601 static void process_create_flag(struct mlx5_ib_dev
*dev
, int *flags
, int flag
,
2602 bool cond
, struct mlx5_ib_qp
*qp
)
2604 if (!(*flags
& flag
))
2613 if (flag
== MLX5_IB_QP_CREATE_WC_TEST
) {
2615 * Special case, if condition didn't meet, it won't be error,
2616 * just different in-kernel flow.
2618 *flags
&= ~MLX5_IB_QP_CREATE_WC_TEST
;
2621 mlx5_ib_dbg(dev
, "Verbs create QP flag 0x%X is not supported\n", flag
);
2624 static int process_create_flags(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
2625 struct ib_qp_init_attr
*attr
)
2627 enum ib_qp_type qp_type
= qp
->type
;
2628 struct mlx5_core_dev
*mdev
= dev
->mdev
;
2629 int create_flags
= attr
->create_flags
;
2632 if (qp
->type
== IB_QPT_UD
&& dev
->profile
== &raw_eth_profile
)
2633 if (create_flags
& ~MLX5_IB_QP_CREATE_WC_TEST
)
2636 if (qp_type
== MLX5_IB_QPT_DCT
)
2637 return (create_flags
) ? -EINVAL
: 0;
2639 if (qp_type
== IB_QPT_RAW_PACKET
&& attr
->rwq_ind_tbl
)
2640 return (create_flags
) ? -EINVAL
: 0;
2642 process_create_flag(dev
, &create_flags
, IB_QP_CREATE_NETIF_QP
,
2643 mlx5_get_flow_namespace(dev
->mdev
,
2644 MLX5_FLOW_NAMESPACE_BYPASS
),
2646 process_create_flag(dev
, &create_flags
,
2647 IB_QP_CREATE_INTEGRITY_EN
,
2648 MLX5_CAP_GEN(mdev
, sho
), qp
);
2649 process_create_flag(dev
, &create_flags
,
2650 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK
,
2651 MLX5_CAP_GEN(mdev
, block_lb_mc
), qp
);
2652 process_create_flag(dev
, &create_flags
, IB_QP_CREATE_CROSS_CHANNEL
,
2653 MLX5_CAP_GEN(mdev
, cd
), qp
);
2654 process_create_flag(dev
, &create_flags
, IB_QP_CREATE_MANAGED_SEND
,
2655 MLX5_CAP_GEN(mdev
, cd
), qp
);
2656 process_create_flag(dev
, &create_flags
, IB_QP_CREATE_MANAGED_RECV
,
2657 MLX5_CAP_GEN(mdev
, cd
), qp
);
2659 if (qp_type
== IB_QPT_UD
) {
2660 process_create_flag(dev
, &create_flags
,
2661 IB_QP_CREATE_IPOIB_UD_LSO
,
2662 MLX5_CAP_GEN(mdev
, ipoib_basic_offloads
),
2664 cond
= MLX5_CAP_GEN(mdev
, port_type
) == MLX5_CAP_PORT_TYPE_IB
;
2665 process_create_flag(dev
, &create_flags
, IB_QP_CREATE_SOURCE_QPN
,
2669 if (qp_type
== IB_QPT_RAW_PACKET
) {
2670 cond
= MLX5_CAP_GEN(mdev
, eth_net_offloads
) &&
2671 MLX5_CAP_ETH(mdev
, scatter_fcs
);
2672 process_create_flag(dev
, &create_flags
,
2673 IB_QP_CREATE_SCATTER_FCS
, cond
, qp
);
2675 cond
= MLX5_CAP_GEN(mdev
, eth_net_offloads
) &&
2676 MLX5_CAP_ETH(mdev
, vlan_cap
);
2677 process_create_flag(dev
, &create_flags
,
2678 IB_QP_CREATE_CVLAN_STRIPPING
, cond
, qp
);
2681 process_create_flag(dev
, &create_flags
,
2682 IB_QP_CREATE_PCI_WRITE_END_PADDING
,
2683 MLX5_CAP_GEN(mdev
, end_pad
), qp
);
2685 process_create_flag(dev
, &create_flags
, MLX5_IB_QP_CREATE_WC_TEST
,
2686 qp_type
!= MLX5_IB_QPT_REG_UMR
, qp
);
2687 process_create_flag(dev
, &create_flags
, MLX5_IB_QP_CREATE_SQPN_QP1
,
2691 mlx5_ib_dbg(dev
, "Create QP has unsupported flags 0x%X\n",
2698 static int process_udata_size(struct mlx5_ib_dev
*dev
,
2699 struct mlx5_create_qp_params
*params
)
2701 size_t ucmd
= sizeof(struct mlx5_ib_create_qp
);
2702 struct ib_udata
*udata
= params
->udata
;
2703 size_t outlen
= udata
->outlen
;
2704 size_t inlen
= udata
->inlen
;
2706 params
->outlen
= min(outlen
, sizeof(struct mlx5_ib_create_qp_resp
));
2707 params
->ucmd_size
= ucmd
;
2708 if (!params
->is_rss_raw
) {
2709 /* User has old rdma-core, which doesn't support ECE */
2711 offsetof(struct mlx5_ib_create_qp
, ece_options
);
2714 * We will check in check_ucmd_data() that user
2715 * cleared everything after inlen.
2717 params
->inlen
= (inlen
< min_inlen
) ? 0 : min(inlen
, ucmd
);
2722 if (inlen
< offsetofend(struct mlx5_ib_create_qp_rss
, flags
))
2725 if (outlen
< offsetofend(struct mlx5_ib_create_qp_resp
, bfreg_index
))
2728 ucmd
= sizeof(struct mlx5_ib_create_qp_rss
);
2729 params
->ucmd_size
= ucmd
;
2730 if (inlen
> ucmd
&& !ib_is_udata_cleared(udata
, ucmd
, inlen
- ucmd
))
2733 params
->inlen
= min(ucmd
, inlen
);
2736 mlx5_ib_dbg(dev
, "udata is too small\n");
2738 return (params
->inlen
) ? 0 : -EINVAL
;
2741 static int create_qp(struct mlx5_ib_dev
*dev
, struct ib_pd
*pd
,
2742 struct mlx5_ib_qp
*qp
,
2743 struct mlx5_create_qp_params
*params
)
2747 if (params
->is_rss_raw
) {
2748 err
= create_rss_raw_qp_tir(dev
, pd
, qp
, params
);
2753 case MLX5_IB_QPT_DCT
:
2754 err
= create_dct(dev
, pd
, qp
, params
);
2756 case IB_QPT_XRC_TGT
:
2757 err
= create_xrc_tgt_qp(dev
, qp
, params
);
2760 err
= mlx5_ib_create_gsi(pd
, qp
, params
->attr
);
2764 err
= create_user_qp(dev
, pd
, qp
, params
);
2766 err
= create_kernel_qp(dev
, pd
, qp
, params
);
2771 mlx5_ib_err(dev
, "Create QP type %d failed\n", qp
->type
);
2775 if (is_qp0(qp
->type
))
2776 qp
->ibqp
.qp_num
= 0;
2777 else if (is_qp1(qp
->type
))
2778 qp
->ibqp
.qp_num
= 1;
2780 qp
->ibqp
.qp_num
= qp
->trans_qp
.base
.mqp
.qpn
;
2783 "QP type %d, ib qpn 0x%X, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x, ece 0x%x\n",
2784 qp
->type
, qp
->ibqp
.qp_num
, qp
->trans_qp
.base
.mqp
.qpn
,
2785 params
->attr
->recv_cq
? to_mcq(params
->attr
->recv_cq
)->mcq
.cqn
:
2787 params
->attr
->send_cq
? to_mcq(params
->attr
->send_cq
)->mcq
.cqn
:
2789 params
->resp
.ece_options
);
2794 static int check_qp_attr(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
2795 struct ib_qp_init_attr
*attr
)
2800 case MLX5_IB_QPT_DCT
:
2801 ret
= (!attr
->srq
|| !attr
->recv_cq
) ? -EINVAL
: 0;
2803 case MLX5_IB_QPT_DCI
:
2804 ret
= (attr
->cap
.max_recv_wr
|| attr
->cap
.max_recv_sge
) ?
2808 case IB_QPT_RAW_PACKET
:
2809 ret
= (attr
->rwq_ind_tbl
&& attr
->send_cq
) ? -EINVAL
: 0;
2816 mlx5_ib_dbg(dev
, "QP type %d has wrong attributes\n", qp
->type
);
2821 static int get_qp_uidx(struct mlx5_ib_qp
*qp
,
2822 struct mlx5_create_qp_params
*params
)
2824 struct mlx5_ib_create_qp
*ucmd
= params
->ucmd
;
2825 struct ib_udata
*udata
= params
->udata
;
2826 struct mlx5_ib_ucontext
*ucontext
= rdma_udata_to_drv_context(
2827 udata
, struct mlx5_ib_ucontext
, ibucontext
);
2829 if (params
->is_rss_raw
)
2832 return get_qp_user_index(ucontext
, ucmd
, sizeof(*ucmd
), ¶ms
->uidx
);
2835 static int mlx5_ib_destroy_dct(struct mlx5_ib_qp
*mqp
)
2837 struct mlx5_ib_dev
*dev
= to_mdev(mqp
->ibqp
.device
);
2839 if (mqp
->state
== IB_QPS_RTR
) {
2842 err
= mlx5_core_destroy_dct(dev
, &mqp
->dct
.mdct
);
2844 mlx5_ib_warn(dev
, "failed to destroy DCT %d\n", err
);
2854 static int check_ucmd_data(struct mlx5_ib_dev
*dev
,
2855 struct mlx5_create_qp_params
*params
)
2857 struct ib_udata
*udata
= params
->udata
;
2861 if (params
->is_rss_raw
)
2863 * These QPs don't have "reserved" field in their
2864 * create_qp input struct, so their data is always valid.
2866 last
= sizeof(struct mlx5_ib_create_qp_rss
);
2868 last
= offsetof(struct mlx5_ib_create_qp
, reserved
);
2870 if (udata
->inlen
<= last
)
2874 * User provides different create_qp structures based on the
2875 * flow and we need to know if he cleared memory after our
2876 * struct create_qp ends.
2878 size
= udata
->inlen
- last
;
2879 ret
= ib_is_udata_cleared(params
->udata
, last
, size
);
2883 "udata is not cleared, inlen = %zu, ucmd = %zu, last = %zu, size = %zu\n",
2884 udata
->inlen
, params
->ucmd_size
, last
, size
);
2885 return ret
? 0 : -EINVAL
;
2888 struct ib_qp
*mlx5_ib_create_qp(struct ib_pd
*pd
, struct ib_qp_init_attr
*attr
,
2889 struct ib_udata
*udata
)
2891 struct mlx5_create_qp_params params
= {};
2892 struct mlx5_ib_dev
*dev
;
2893 struct mlx5_ib_qp
*qp
;
2894 enum ib_qp_type type
;
2897 dev
= pd
? to_mdev(pd
->device
) :
2898 to_mdev(to_mxrcd(attr
->xrcd
)->ibxrcd
.device
);
2900 err
= check_qp_type(dev
, attr
, &type
);
2902 return ERR_PTR(err
);
2904 err
= check_valid_flow(dev
, pd
, attr
, udata
);
2906 return ERR_PTR(err
);
2908 params
.udata
= udata
;
2909 params
.uidx
= MLX5_IB_DEFAULT_UIDX
;
2911 params
.is_rss_raw
= !!attr
->rwq_ind_tbl
;
2914 err
= process_udata_size(dev
, ¶ms
);
2916 return ERR_PTR(err
);
2918 err
= check_ucmd_data(dev
, ¶ms
);
2920 return ERR_PTR(err
);
2922 params
.ucmd
= kzalloc(params
.ucmd_size
, GFP_KERNEL
);
2924 return ERR_PTR(-ENOMEM
);
2926 err
= ib_copy_from_udata(params
.ucmd
, udata
, params
.inlen
);
2931 qp
= kzalloc(sizeof(*qp
), GFP_KERNEL
);
2937 mutex_init(&qp
->mutex
);
2940 err
= process_vendor_flags(dev
, qp
, params
.ucmd
, attr
);
2944 err
= get_qp_uidx(qp
, ¶ms
);
2948 err
= process_create_flags(dev
, qp
, attr
);
2952 err
= check_qp_attr(dev
, qp
, attr
);
2956 err
= create_qp(dev
, pd
, qp
, ¶ms
);
2965 * It is safe to copy response for all user create QP flows,
2966 * including MLX5_IB_QPT_DCT, which doesn't need it.
2967 * In that case, resp will be filled with zeros.
2969 err
= ib_copy_to_udata(udata
, ¶ms
.resp
, params
.outlen
);
2977 case MLX5_IB_QPT_DCT
:
2978 mlx5_ib_destroy_dct(qp
);
2981 mlx5_ib_destroy_gsi(qp
);
2985 * These lines below are temp solution till QP allocation
2986 * will be moved to be under IB/core responsiblity.
2988 qp
->ibqp
.send_cq
= attr
->send_cq
;
2989 qp
->ibqp
.recv_cq
= attr
->recv_cq
;
2991 destroy_qp_common(dev
, qp
, udata
);
2999 return ERR_PTR(err
);
3002 int mlx5_ib_destroy_qp(struct ib_qp
*qp
, struct ib_udata
*udata
)
3004 struct mlx5_ib_dev
*dev
= to_mdev(qp
->device
);
3005 struct mlx5_ib_qp
*mqp
= to_mqp(qp
);
3007 if (unlikely(qp
->qp_type
== IB_QPT_GSI
))
3008 return mlx5_ib_destroy_gsi(mqp
);
3010 if (mqp
->type
== MLX5_IB_QPT_DCT
)
3011 return mlx5_ib_destroy_dct(mqp
);
3013 destroy_qp_common(dev
, mqp
, udata
);
3020 static int set_qpc_atomic_flags(struct mlx5_ib_qp
*qp
,
3021 const struct ib_qp_attr
*attr
, int attr_mask
,
3024 struct mlx5_ib_dev
*dev
= to_mdev(qp
->ibqp
.device
);
3028 if (attr_mask
& IB_QP_MAX_DEST_RD_ATOMIC
)
3029 dest_rd_atomic
= attr
->max_dest_rd_atomic
;
3031 dest_rd_atomic
= qp
->trans_qp
.resp_depth
;
3033 if (attr_mask
& IB_QP_ACCESS_FLAGS
)
3034 access_flags
= attr
->qp_access_flags
;
3036 access_flags
= qp
->trans_qp
.atomic_rd_en
;
3038 if (!dest_rd_atomic
)
3039 access_flags
&= IB_ACCESS_REMOTE_WRITE
;
3041 MLX5_SET(qpc
, qpc
, rre
, !!(access_flags
& IB_ACCESS_REMOTE_READ
));
3043 if (access_flags
& IB_ACCESS_REMOTE_ATOMIC
) {
3046 atomic_mode
= get_atomic_mode(dev
, qp
->ibqp
.qp_type
);
3047 if (atomic_mode
< 0)
3050 MLX5_SET(qpc
, qpc
, rae
, 1);
3051 MLX5_SET(qpc
, qpc
, atomic_mode
, atomic_mode
);
3054 MLX5_SET(qpc
, qpc
, rwe
, !!(access_flags
& IB_ACCESS_REMOTE_WRITE
));
3059 MLX5_PATH_FLAG_FL
= 1 << 0,
3060 MLX5_PATH_FLAG_FREE_AR
= 1 << 1,
3061 MLX5_PATH_FLAG_COUNTER
= 1 << 2,
3064 static int ib_to_mlx5_rate_map(u8 rate
)
3067 case IB_RATE_PORT_CURRENT
:
3069 case IB_RATE_56_GBPS
:
3071 case IB_RATE_25_GBPS
:
3073 case IB_RATE_100_GBPS
:
3075 case IB_RATE_200_GBPS
:
3077 case IB_RATE_50_GBPS
:
3080 return rate
+ MLX5_STAT_RATE_OFFSET
;
3086 static int ib_rate_to_mlx5(struct mlx5_ib_dev
*dev
, u8 rate
)
3088 u32 stat_rate_support
;
3090 if (rate
== IB_RATE_PORT_CURRENT
)
3093 if (rate
< IB_RATE_2_5_GBPS
|| rate
> IB_RATE_600_GBPS
)
3096 stat_rate_support
= MLX5_CAP_GEN(dev
->mdev
, stat_rate_support
);
3097 while (rate
!= IB_RATE_PORT_CURRENT
&&
3098 !(1 << ib_to_mlx5_rate_map(rate
) & stat_rate_support
))
3101 return ib_to_mlx5_rate_map(rate
);
3104 static int modify_raw_packet_eth_prio(struct mlx5_core_dev
*dev
,
3105 struct mlx5_ib_sq
*sq
, u8 sl
,
3113 inlen
= MLX5_ST_SZ_BYTES(modify_tis_in
);
3114 in
= kvzalloc(inlen
, GFP_KERNEL
);
3118 MLX5_SET(modify_tis_in
, in
, bitmask
.prio
, 1);
3119 MLX5_SET(modify_tis_in
, in
, uid
, to_mpd(pd
)->uid
);
3121 tisc
= MLX5_ADDR_OF(modify_tis_in
, in
, ctx
);
3122 MLX5_SET(tisc
, tisc
, prio
, ((sl
& 0x7) << 1));
3124 err
= mlx5_core_modify_tis(dev
, sq
->tisn
, in
);
3131 static int modify_raw_packet_tx_affinity(struct mlx5_core_dev
*dev
,
3132 struct mlx5_ib_sq
*sq
, u8 tx_affinity
,
3140 inlen
= MLX5_ST_SZ_BYTES(modify_tis_in
);
3141 in
= kvzalloc(inlen
, GFP_KERNEL
);
3145 MLX5_SET(modify_tis_in
, in
, bitmask
.lag_tx_port_affinity
, 1);
3146 MLX5_SET(modify_tis_in
, in
, uid
, to_mpd(pd
)->uid
);
3148 tisc
= MLX5_ADDR_OF(modify_tis_in
, in
, ctx
);
3149 MLX5_SET(tisc
, tisc
, lag_tx_port_affinity
, tx_affinity
);
3151 err
= mlx5_core_modify_tis(dev
, sq
->tisn
, in
);
3158 static void mlx5_set_path_udp_sport(void *path
, const struct rdma_ah_attr
*ah
,
3162 u32 fl
= ah
->grh
.flow_label
;
3165 fl
= rdma_calc_flow_label(lqpn
, rqpn
);
3167 MLX5_SET(ads
, path
, udp_sport
, rdma_flow_label_to_udp_sport(fl
));
3170 static int mlx5_set_path(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
3171 const struct rdma_ah_attr
*ah
, void *path
, u8 port
,
3172 int attr_mask
, u32 path_flags
,
3173 const struct ib_qp_attr
*attr
, bool alt
)
3175 const struct ib_global_route
*grh
= rdma_ah_read_grh(ah
);
3177 enum ib_gid_type gid_type
;
3178 u8 ah_flags
= rdma_ah_get_ah_flags(ah
);
3179 u8 sl
= rdma_ah_get_sl(ah
);
3181 if (attr_mask
& IB_QP_PKEY_INDEX
)
3182 MLX5_SET(ads
, path
, pkey_index
,
3183 alt
? attr
->alt_pkey_index
: attr
->pkey_index
);
3185 if (ah_flags
& IB_AH_GRH
) {
3186 if (grh
->sgid_index
>=
3187 dev
->mdev
->port_caps
[port
- 1].gid_table_len
) {
3188 pr_err("sgid_index (%u) too large. max is %d\n",
3190 dev
->mdev
->port_caps
[port
- 1].gid_table_len
);
3195 if (ah
->type
== RDMA_AH_ATTR_TYPE_ROCE
) {
3196 if (!(ah_flags
& IB_AH_GRH
))
3199 ether_addr_copy(MLX5_ADDR_OF(ads
, path
, rmac_47_32
),
3201 if ((qp
->ibqp
.qp_type
== IB_QPT_RC
||
3202 qp
->ibqp
.qp_type
== IB_QPT_UC
||
3203 qp
->ibqp
.qp_type
== IB_QPT_XRC_INI
||
3204 qp
->ibqp
.qp_type
== IB_QPT_XRC_TGT
) &&
3205 (grh
->sgid_attr
->gid_type
== IB_GID_TYPE_ROCE_UDP_ENCAP
) &&
3206 (attr_mask
& IB_QP_DEST_QPN
))
3207 mlx5_set_path_udp_sport(path
, ah
,
3210 MLX5_SET(ads
, path
, eth_prio
, sl
& 0x7);
3211 gid_type
= ah
->grh
.sgid_attr
->gid_type
;
3212 if (gid_type
== IB_GID_TYPE_ROCE_UDP_ENCAP
)
3213 MLX5_SET(ads
, path
, dscp
, grh
->traffic_class
>> 2);
3215 MLX5_SET(ads
, path
, fl
, !!(path_flags
& MLX5_PATH_FLAG_FL
));
3216 MLX5_SET(ads
, path
, free_ar
,
3217 !!(path_flags
& MLX5_PATH_FLAG_FREE_AR
));
3218 MLX5_SET(ads
, path
, rlid
, rdma_ah_get_dlid(ah
));
3219 MLX5_SET(ads
, path
, mlid
, rdma_ah_get_path_bits(ah
));
3220 MLX5_SET(ads
, path
, grh
, !!(ah_flags
& IB_AH_GRH
));
3221 MLX5_SET(ads
, path
, sl
, sl
);
3224 if (ah_flags
& IB_AH_GRH
) {
3225 MLX5_SET(ads
, path
, src_addr_index
, grh
->sgid_index
);
3226 MLX5_SET(ads
, path
, hop_limit
, grh
->hop_limit
);
3227 MLX5_SET(ads
, path
, tclass
, grh
->traffic_class
);
3228 MLX5_SET(ads
, path
, flow_label
, grh
->flow_label
);
3229 memcpy(MLX5_ADDR_OF(ads
, path
, rgid_rip
), grh
->dgid
.raw
,
3230 sizeof(grh
->dgid
.raw
));
3233 err
= ib_rate_to_mlx5(dev
, rdma_ah_get_static_rate(ah
));
3236 MLX5_SET(ads
, path
, stat_rate
, err
);
3237 MLX5_SET(ads
, path
, vhca_port_num
, port
);
3239 if (attr_mask
& IB_QP_TIMEOUT
)
3240 MLX5_SET(ads
, path
, ack_timeout
,
3241 alt
? attr
->alt_timeout
: attr
->timeout
);
3243 if ((qp
->ibqp
.qp_type
== IB_QPT_RAW_PACKET
) && qp
->sq
.wqe_cnt
)
3244 return modify_raw_packet_eth_prio(dev
->mdev
,
3245 &qp
->raw_packet_qp
.sq
,
3246 sl
& 0xf, qp
->ibqp
.pd
);
3251 static enum mlx5_qp_optpar opt_mask
[MLX5_QP_NUM_STATE
][MLX5_QP_NUM_STATE
][MLX5_QP_ST_MAX
] = {
3252 [MLX5_QP_STATE_INIT
] = {
3253 [MLX5_QP_STATE_INIT
] = {
3254 [MLX5_QP_ST_RC
] = MLX5_QP_OPTPAR_RRE
|
3255 MLX5_QP_OPTPAR_RAE
|
3256 MLX5_QP_OPTPAR_RWE
|
3257 MLX5_QP_OPTPAR_PKEY_INDEX
|
3258 MLX5_QP_OPTPAR_PRI_PORT
|
3259 MLX5_QP_OPTPAR_LAG_TX_AFF
,
3260 [MLX5_QP_ST_UC
] = MLX5_QP_OPTPAR_RWE
|
3261 MLX5_QP_OPTPAR_PKEY_INDEX
|
3262 MLX5_QP_OPTPAR_PRI_PORT
|
3263 MLX5_QP_OPTPAR_LAG_TX_AFF
,
3264 [MLX5_QP_ST_UD
] = MLX5_QP_OPTPAR_PKEY_INDEX
|
3265 MLX5_QP_OPTPAR_Q_KEY
|
3266 MLX5_QP_OPTPAR_PRI_PORT
,
3267 [MLX5_QP_ST_XRC
] = MLX5_QP_OPTPAR_RRE
|
3268 MLX5_QP_OPTPAR_RAE
|
3269 MLX5_QP_OPTPAR_RWE
|
3270 MLX5_QP_OPTPAR_PKEY_INDEX
|
3271 MLX5_QP_OPTPAR_PRI_PORT
|
3272 MLX5_QP_OPTPAR_LAG_TX_AFF
,
3274 [MLX5_QP_STATE_RTR
] = {
3275 [MLX5_QP_ST_RC
] = MLX5_QP_OPTPAR_ALT_ADDR_PATH
|
3276 MLX5_QP_OPTPAR_RRE
|
3277 MLX5_QP_OPTPAR_RAE
|
3278 MLX5_QP_OPTPAR_RWE
|
3279 MLX5_QP_OPTPAR_PKEY_INDEX
|
3280 MLX5_QP_OPTPAR_LAG_TX_AFF
,
3281 [MLX5_QP_ST_UC
] = MLX5_QP_OPTPAR_ALT_ADDR_PATH
|
3282 MLX5_QP_OPTPAR_RWE
|
3283 MLX5_QP_OPTPAR_PKEY_INDEX
|
3284 MLX5_QP_OPTPAR_LAG_TX_AFF
,
3285 [MLX5_QP_ST_UD
] = MLX5_QP_OPTPAR_PKEY_INDEX
|
3286 MLX5_QP_OPTPAR_Q_KEY
,
3287 [MLX5_QP_ST_MLX
] = MLX5_QP_OPTPAR_PKEY_INDEX
|
3288 MLX5_QP_OPTPAR_Q_KEY
,
3289 [MLX5_QP_ST_XRC
] = MLX5_QP_OPTPAR_ALT_ADDR_PATH
|
3290 MLX5_QP_OPTPAR_RRE
|
3291 MLX5_QP_OPTPAR_RAE
|
3292 MLX5_QP_OPTPAR_RWE
|
3293 MLX5_QP_OPTPAR_PKEY_INDEX
|
3294 MLX5_QP_OPTPAR_LAG_TX_AFF
,
3297 [MLX5_QP_STATE_RTR
] = {
3298 [MLX5_QP_STATE_RTS
] = {
3299 [MLX5_QP_ST_RC
] = MLX5_QP_OPTPAR_ALT_ADDR_PATH
|
3300 MLX5_QP_OPTPAR_RRE
|
3301 MLX5_QP_OPTPAR_RAE
|
3302 MLX5_QP_OPTPAR_RWE
|
3303 MLX5_QP_OPTPAR_PM_STATE
|
3304 MLX5_QP_OPTPAR_RNR_TIMEOUT
,
3305 [MLX5_QP_ST_UC
] = MLX5_QP_OPTPAR_ALT_ADDR_PATH
|
3306 MLX5_QP_OPTPAR_RWE
|
3307 MLX5_QP_OPTPAR_PM_STATE
,
3308 [MLX5_QP_ST_UD
] = MLX5_QP_OPTPAR_Q_KEY
,
3309 [MLX5_QP_ST_XRC
] = MLX5_QP_OPTPAR_ALT_ADDR_PATH
|
3310 MLX5_QP_OPTPAR_RRE
|
3311 MLX5_QP_OPTPAR_RAE
|
3312 MLX5_QP_OPTPAR_RWE
|
3313 MLX5_QP_OPTPAR_PM_STATE
|
3314 MLX5_QP_OPTPAR_RNR_TIMEOUT
,
3317 [MLX5_QP_STATE_RTS
] = {
3318 [MLX5_QP_STATE_RTS
] = {
3319 [MLX5_QP_ST_RC
] = MLX5_QP_OPTPAR_RRE
|
3320 MLX5_QP_OPTPAR_RAE
|
3321 MLX5_QP_OPTPAR_RWE
|
3322 MLX5_QP_OPTPAR_RNR_TIMEOUT
|
3323 MLX5_QP_OPTPAR_PM_STATE
|
3324 MLX5_QP_OPTPAR_ALT_ADDR_PATH
,
3325 [MLX5_QP_ST_UC
] = MLX5_QP_OPTPAR_RWE
|
3326 MLX5_QP_OPTPAR_PM_STATE
|
3327 MLX5_QP_OPTPAR_ALT_ADDR_PATH
,
3328 [MLX5_QP_ST_UD
] = MLX5_QP_OPTPAR_Q_KEY
|
3329 MLX5_QP_OPTPAR_SRQN
|
3330 MLX5_QP_OPTPAR_CQN_RCV
,
3331 [MLX5_QP_ST_XRC
] = MLX5_QP_OPTPAR_RRE
|
3332 MLX5_QP_OPTPAR_RAE
|
3333 MLX5_QP_OPTPAR_RWE
|
3334 MLX5_QP_OPTPAR_RNR_TIMEOUT
|
3335 MLX5_QP_OPTPAR_PM_STATE
|
3336 MLX5_QP_OPTPAR_ALT_ADDR_PATH
,
3339 [MLX5_QP_STATE_SQER
] = {
3340 [MLX5_QP_STATE_RTS
] = {
3341 [MLX5_QP_ST_UD
] = MLX5_QP_OPTPAR_Q_KEY
,
3342 [MLX5_QP_ST_MLX
] = MLX5_QP_OPTPAR_Q_KEY
,
3343 [MLX5_QP_ST_UC
] = MLX5_QP_OPTPAR_RWE
,
3344 [MLX5_QP_ST_RC
] = MLX5_QP_OPTPAR_RNR_TIMEOUT
|
3345 MLX5_QP_OPTPAR_RWE
|
3346 MLX5_QP_OPTPAR_RAE
|
3348 [MLX5_QP_ST_XRC
] = MLX5_QP_OPTPAR_RNR_TIMEOUT
|
3349 MLX5_QP_OPTPAR_RWE
|
3350 MLX5_QP_OPTPAR_RAE
|
3356 static int ib_nr_to_mlx5_nr(int ib_mask
)
3361 case IB_QP_CUR_STATE
:
3363 case IB_QP_EN_SQD_ASYNC_NOTIFY
:
3365 case IB_QP_ACCESS_FLAGS
:
3366 return MLX5_QP_OPTPAR_RWE
| MLX5_QP_OPTPAR_RRE
|
3368 case IB_QP_PKEY_INDEX
:
3369 return MLX5_QP_OPTPAR_PKEY_INDEX
;
3371 return MLX5_QP_OPTPAR_PRI_PORT
;
3373 return MLX5_QP_OPTPAR_Q_KEY
;
3375 return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH
|
3376 MLX5_QP_OPTPAR_PRI_PORT
;
3377 case IB_QP_PATH_MTU
:
3380 return MLX5_QP_OPTPAR_ACK_TIMEOUT
;
3381 case IB_QP_RETRY_CNT
:
3382 return MLX5_QP_OPTPAR_RETRY_COUNT
;
3383 case IB_QP_RNR_RETRY
:
3384 return MLX5_QP_OPTPAR_RNR_RETRY
;
3387 case IB_QP_MAX_QP_RD_ATOMIC
:
3388 return MLX5_QP_OPTPAR_SRA_MAX
;
3389 case IB_QP_ALT_PATH
:
3390 return MLX5_QP_OPTPAR_ALT_ADDR_PATH
;
3391 case IB_QP_MIN_RNR_TIMER
:
3392 return MLX5_QP_OPTPAR_RNR_TIMEOUT
;
3395 case IB_QP_MAX_DEST_RD_ATOMIC
:
3396 return MLX5_QP_OPTPAR_RRA_MAX
| MLX5_QP_OPTPAR_RWE
|
3397 MLX5_QP_OPTPAR_RRE
| MLX5_QP_OPTPAR_RAE
;
3398 case IB_QP_PATH_MIG_STATE
:
3399 return MLX5_QP_OPTPAR_PM_STATE
;
3402 case IB_QP_DEST_QPN
:
3408 static int ib_mask_to_mlx5_opt(int ib_mask
)
3413 for (i
= 0; i
< 8 * sizeof(int); i
++) {
3414 if ((1 << i
) & ib_mask
)
3415 result
|= ib_nr_to_mlx5_nr(1 << i
);
3421 static int modify_raw_packet_qp_rq(
3422 struct mlx5_ib_dev
*dev
, struct mlx5_ib_rq
*rq
, int new_state
,
3423 const struct mlx5_modify_raw_qp_param
*raw_qp_param
, struct ib_pd
*pd
)
3430 inlen
= MLX5_ST_SZ_BYTES(modify_rq_in
);
3431 in
= kvzalloc(inlen
, GFP_KERNEL
);
3435 MLX5_SET(modify_rq_in
, in
, rq_state
, rq
->state
);
3436 MLX5_SET(modify_rq_in
, in
, uid
, to_mpd(pd
)->uid
);
3438 rqc
= MLX5_ADDR_OF(modify_rq_in
, in
, ctx
);
3439 MLX5_SET(rqc
, rqc
, state
, new_state
);
3441 if (raw_qp_param
->set_mask
& MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID
) {
3442 if (MLX5_CAP_GEN(dev
->mdev
, modify_rq_counter_set_id
)) {
3443 MLX5_SET64(modify_rq_in
, in
, modify_bitmask
,
3444 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID
);
3445 MLX5_SET(rqc
, rqc
, counter_set_id
, raw_qp_param
->rq_q_ctr_id
);
3449 "RAW PACKET QP counters are not supported on current FW\n");
3452 err
= mlx5_core_modify_rq(dev
->mdev
, rq
->base
.mqp
.qpn
, in
);
3456 rq
->state
= new_state
;
3463 static int modify_raw_packet_qp_sq(
3464 struct mlx5_core_dev
*dev
, struct mlx5_ib_sq
*sq
, int new_state
,
3465 const struct mlx5_modify_raw_qp_param
*raw_qp_param
, struct ib_pd
*pd
)
3467 struct mlx5_ib_qp
*ibqp
= sq
->base
.container_mibqp
;
3468 struct mlx5_rate_limit old_rl
= ibqp
->rl
;
3469 struct mlx5_rate_limit new_rl
= old_rl
;
3470 bool new_rate_added
= false;
3477 inlen
= MLX5_ST_SZ_BYTES(modify_sq_in
);
3478 in
= kvzalloc(inlen
, GFP_KERNEL
);
3482 MLX5_SET(modify_sq_in
, in
, uid
, to_mpd(pd
)->uid
);
3483 MLX5_SET(modify_sq_in
, in
, sq_state
, sq
->state
);
3485 sqc
= MLX5_ADDR_OF(modify_sq_in
, in
, ctx
);
3486 MLX5_SET(sqc
, sqc
, state
, new_state
);
3488 if (raw_qp_param
->set_mask
& MLX5_RAW_QP_RATE_LIMIT
) {
3489 if (new_state
!= MLX5_SQC_STATE_RDY
)
3490 pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
3493 new_rl
= raw_qp_param
->rl
;
3496 if (!mlx5_rl_are_equal(&old_rl
, &new_rl
)) {
3498 err
= mlx5_rl_add_rate(dev
, &rl_index
, &new_rl
);
3500 pr_err("Failed configuring rate limit(err %d): \
3501 rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
3502 err
, new_rl
.rate
, new_rl
.max_burst_sz
,
3503 new_rl
.typical_pkt_sz
);
3507 new_rate_added
= true;
3510 MLX5_SET64(modify_sq_in
, in
, modify_bitmask
, 1);
3511 /* index 0 means no limit */
3512 MLX5_SET(sqc
, sqc
, packet_pacing_rate_limit_index
, rl_index
);
3515 err
= mlx5_core_modify_sq(dev
, sq
->base
.mqp
.qpn
, in
);
3517 /* Remove new rate from table if failed */
3519 mlx5_rl_remove_rate(dev
, &new_rl
);
3523 /* Only remove the old rate after new rate was set */
3524 if ((old_rl
.rate
&& !mlx5_rl_are_equal(&old_rl
, &new_rl
)) ||
3525 (new_state
!= MLX5_SQC_STATE_RDY
)) {
3526 mlx5_rl_remove_rate(dev
, &old_rl
);
3527 if (new_state
!= MLX5_SQC_STATE_RDY
)
3528 memset(&new_rl
, 0, sizeof(new_rl
));
3532 sq
->state
= new_state
;
3539 static int modify_raw_packet_qp(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
3540 const struct mlx5_modify_raw_qp_param
*raw_qp_param
,
3543 struct mlx5_ib_raw_packet_qp
*raw_packet_qp
= &qp
->raw_packet_qp
;
3544 struct mlx5_ib_rq
*rq
= &raw_packet_qp
->rq
;
3545 struct mlx5_ib_sq
*sq
= &raw_packet_qp
->sq
;
3546 int modify_rq
= !!qp
->rq
.wqe_cnt
;
3547 int modify_sq
= !!qp
->sq
.wqe_cnt
;
3552 switch (raw_qp_param
->operation
) {
3553 case MLX5_CMD_OP_RST2INIT_QP
:
3554 rq_state
= MLX5_RQC_STATE_RDY
;
3555 sq_state
= MLX5_SQC_STATE_RST
;
3557 case MLX5_CMD_OP_2ERR_QP
:
3558 rq_state
= MLX5_RQC_STATE_ERR
;
3559 sq_state
= MLX5_SQC_STATE_ERR
;
3561 case MLX5_CMD_OP_2RST_QP
:
3562 rq_state
= MLX5_RQC_STATE_RST
;
3563 sq_state
= MLX5_SQC_STATE_RST
;
3565 case MLX5_CMD_OP_RTR2RTS_QP
:
3566 case MLX5_CMD_OP_RTS2RTS_QP
:
3567 if (raw_qp_param
->set_mask
& ~MLX5_RAW_QP_RATE_LIMIT
)
3571 sq_state
= MLX5_SQC_STATE_RDY
;
3573 case MLX5_CMD_OP_INIT2INIT_QP
:
3574 case MLX5_CMD_OP_INIT2RTR_QP
:
3575 if (raw_qp_param
->set_mask
)
3585 err
= modify_raw_packet_qp_rq(dev
, rq
, rq_state
, raw_qp_param
,
3592 struct mlx5_flow_handle
*flow_rule
;
3595 err
= modify_raw_packet_tx_affinity(dev
->mdev
, sq
,
3602 flow_rule
= create_flow_rule_vport_sq(dev
, sq
,
3603 raw_qp_param
->port
);
3604 if (IS_ERR(flow_rule
))
3605 return PTR_ERR(flow_rule
);
3607 err
= modify_raw_packet_qp_sq(dev
->mdev
, sq
, sq_state
,
3608 raw_qp_param
, qp
->ibqp
.pd
);
3611 mlx5_del_flow_rules(flow_rule
);
3616 destroy_flow_rule_vport_sq(sq
);
3617 sq
->flow_rule
= flow_rule
;
3626 static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev
*dev
,
3627 struct ib_udata
*udata
)
3629 struct mlx5_ib_ucontext
*ucontext
= rdma_udata_to_drv_context(
3630 udata
, struct mlx5_ib_ucontext
, ibucontext
);
3631 u8 port_num
= mlx5_core_native_port_num(dev
->mdev
) - 1;
3632 atomic_t
*tx_port_affinity
;
3635 tx_port_affinity
= &ucontext
->tx_port_affinity
;
3637 tx_port_affinity
= &dev
->port
[port_num
].roce
.tx_port_affinity
;
3639 return (unsigned int)atomic_add_return(1, tx_port_affinity
) %
3643 static bool qp_supports_affinity(struct mlx5_ib_qp
*qp
)
3645 if ((qp
->type
== IB_QPT_RC
) || (qp
->type
== IB_QPT_UD
) ||
3646 (qp
->type
== IB_QPT_UC
) || (qp
->type
== IB_QPT_RAW_PACKET
) ||
3647 (qp
->type
== IB_QPT_XRC_INI
) || (qp
->type
== IB_QPT_XRC_TGT
) ||
3648 (qp
->type
== MLX5_IB_QPT_DCI
))
3653 static unsigned int get_tx_affinity(struct ib_qp
*qp
,
3654 const struct ib_qp_attr
*attr
,
3655 int attr_mask
, u8 init
,
3656 struct ib_udata
*udata
)
3658 struct mlx5_ib_ucontext
*ucontext
= rdma_udata_to_drv_context(
3659 udata
, struct mlx5_ib_ucontext
, ibucontext
);
3660 struct mlx5_ib_dev
*dev
= to_mdev(qp
->device
);
3661 struct mlx5_ib_qp
*mqp
= to_mqp(qp
);
3662 struct mlx5_ib_qp_base
*qp_base
;
3663 unsigned int tx_affinity
;
3665 if (!(mlx5_ib_lag_should_assign_affinity(dev
) &&
3666 qp_supports_affinity(mqp
)))
3669 if (mqp
->flags
& MLX5_IB_QP_CREATE_SQPN_QP1
)
3670 tx_affinity
= mqp
->gsi_lag_port
;
3672 tx_affinity
= get_tx_affinity_rr(dev
, udata
);
3673 else if ((attr_mask
& IB_QP_AV
) && attr
->xmit_slave
)
3675 mlx5_lag_get_slave_port(dev
->mdev
, attr
->xmit_slave
);
3679 qp_base
= &mqp
->trans_qp
.base
;
3681 mlx5_ib_dbg(dev
, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
3682 tx_affinity
, qp_base
->mqp
.qpn
, ucontext
);
3684 mlx5_ib_dbg(dev
, "Set tx affinity 0x%x to qpn 0x%x\n",
3685 tx_affinity
, qp_base
->mqp
.qpn
);
3689 static int __mlx5_ib_qp_set_counter(struct ib_qp
*qp
,
3690 struct rdma_counter
*counter
)
3692 struct mlx5_ib_dev
*dev
= to_mdev(qp
->device
);
3693 u32 in
[MLX5_ST_SZ_DW(rts2rts_qp_in
)] = {};
3694 struct mlx5_ib_qp
*mqp
= to_mqp(qp
);
3695 struct mlx5_ib_qp_base
*base
;
3700 set_id
= counter
->id
;
3702 set_id
= mlx5_ib_get_counters_id(dev
, mqp
->port
- 1);
3704 base
= &mqp
->trans_qp
.base
;
3705 MLX5_SET(rts2rts_qp_in
, in
, opcode
, MLX5_CMD_OP_RTS2RTS_QP
);
3706 MLX5_SET(rts2rts_qp_in
, in
, qpn
, base
->mqp
.qpn
);
3707 MLX5_SET(rts2rts_qp_in
, in
, uid
, base
->mqp
.uid
);
3708 MLX5_SET(rts2rts_qp_in
, in
, opt_param_mask
,
3709 MLX5_QP_OPTPAR_COUNTER_SET_ID
);
3711 qpc
= MLX5_ADDR_OF(rts2rts_qp_in
, in
, qpc
);
3712 MLX5_SET(qpc
, qpc
, counter_set_id
, set_id
);
3713 return mlx5_cmd_exec_in(dev
->mdev
, rts2rts_qp
, in
);
3716 static int __mlx5_ib_modify_qp(struct ib_qp
*ibqp
,
3717 const struct ib_qp_attr
*attr
, int attr_mask
,
3718 enum ib_qp_state cur_state
,
3719 enum ib_qp_state new_state
,
3720 const struct mlx5_ib_modify_qp
*ucmd
,
3721 struct mlx5_ib_modify_qp_resp
*resp
,
3722 struct ib_udata
*udata
)
3724 static const u16 optab
[MLX5_QP_NUM_STATE
][MLX5_QP_NUM_STATE
] = {
3725 [MLX5_QP_STATE_RST
] = {
3726 [MLX5_QP_STATE_RST
] = MLX5_CMD_OP_2RST_QP
,
3727 [MLX5_QP_STATE_ERR
] = MLX5_CMD_OP_2ERR_QP
,
3728 [MLX5_QP_STATE_INIT
] = MLX5_CMD_OP_RST2INIT_QP
,
3730 [MLX5_QP_STATE_INIT
] = {
3731 [MLX5_QP_STATE_RST
] = MLX5_CMD_OP_2RST_QP
,
3732 [MLX5_QP_STATE_ERR
] = MLX5_CMD_OP_2ERR_QP
,
3733 [MLX5_QP_STATE_INIT
] = MLX5_CMD_OP_INIT2INIT_QP
,
3734 [MLX5_QP_STATE_RTR
] = MLX5_CMD_OP_INIT2RTR_QP
,
3736 [MLX5_QP_STATE_RTR
] = {
3737 [MLX5_QP_STATE_RST
] = MLX5_CMD_OP_2RST_QP
,
3738 [MLX5_QP_STATE_ERR
] = MLX5_CMD_OP_2ERR_QP
,
3739 [MLX5_QP_STATE_RTS
] = MLX5_CMD_OP_RTR2RTS_QP
,
3741 [MLX5_QP_STATE_RTS
] = {
3742 [MLX5_QP_STATE_RST
] = MLX5_CMD_OP_2RST_QP
,
3743 [MLX5_QP_STATE_ERR
] = MLX5_CMD_OP_2ERR_QP
,
3744 [MLX5_QP_STATE_RTS
] = MLX5_CMD_OP_RTS2RTS_QP
,
3746 [MLX5_QP_STATE_SQD
] = {
3747 [MLX5_QP_STATE_RST
] = MLX5_CMD_OP_2RST_QP
,
3748 [MLX5_QP_STATE_ERR
] = MLX5_CMD_OP_2ERR_QP
,
3750 [MLX5_QP_STATE_SQER
] = {
3751 [MLX5_QP_STATE_RST
] = MLX5_CMD_OP_2RST_QP
,
3752 [MLX5_QP_STATE_ERR
] = MLX5_CMD_OP_2ERR_QP
,
3753 [MLX5_QP_STATE_RTS
] = MLX5_CMD_OP_SQERR2RTS_QP
,
3755 [MLX5_QP_STATE_ERR
] = {
3756 [MLX5_QP_STATE_RST
] = MLX5_CMD_OP_2RST_QP
,
3757 [MLX5_QP_STATE_ERR
] = MLX5_CMD_OP_2ERR_QP
,
3761 struct mlx5_ib_dev
*dev
= to_mdev(ibqp
->device
);
3762 struct mlx5_ib_qp
*qp
= to_mqp(ibqp
);
3763 struct mlx5_ib_qp_base
*base
= &qp
->trans_qp
.base
;
3764 struct mlx5_ib_cq
*send_cq
, *recv_cq
;
3765 struct mlx5_ib_pd
*pd
;
3766 enum mlx5_qp_state mlx5_cur
, mlx5_new
;
3767 void *qpc
, *pri_path
, *alt_path
;
3768 enum mlx5_qp_optpar optpar
= 0;
3775 mlx5_st
= to_mlx5_st(qp
->type
);
3779 qpc
= kzalloc(MLX5_ST_SZ_BYTES(qpc
), GFP_KERNEL
);
3783 pd
= to_mpd(qp
->ibqp
.pd
);
3784 MLX5_SET(qpc
, qpc
, st
, mlx5_st
);
3786 if (!(attr_mask
& IB_QP_PATH_MIG_STATE
)) {
3787 MLX5_SET(qpc
, qpc
, pm_state
, MLX5_QP_PM_MIGRATED
);
3789 switch (attr
->path_mig_state
) {
3790 case IB_MIG_MIGRATED
:
3791 MLX5_SET(qpc
, qpc
, pm_state
, MLX5_QP_PM_MIGRATED
);
3794 MLX5_SET(qpc
, qpc
, pm_state
, MLX5_QP_PM_REARM
);
3797 MLX5_SET(qpc
, qpc
, pm_state
, MLX5_QP_PM_ARMED
);
3802 tx_affinity
= get_tx_affinity(ibqp
, attr
, attr_mask
,
3803 cur_state
== IB_QPS_RESET
&&
3804 new_state
== IB_QPS_INIT
, udata
);
3806 MLX5_SET(qpc
, qpc
, lag_tx_port_affinity
, tx_affinity
);
3807 if (tx_affinity
&& new_state
== IB_QPS_RTR
&&
3808 MLX5_CAP_GEN(dev
->mdev
, init2_lag_tx_port_affinity
))
3809 optpar
|= MLX5_QP_OPTPAR_LAG_TX_AFF
;
3811 if (is_sqp(ibqp
->qp_type
)) {
3812 MLX5_SET(qpc
, qpc
, mtu
, IB_MTU_256
);
3813 MLX5_SET(qpc
, qpc
, log_msg_max
, 8);
3814 } else if ((ibqp
->qp_type
== IB_QPT_UD
&&
3815 !(qp
->flags
& IB_QP_CREATE_SOURCE_QPN
)) ||
3816 ibqp
->qp_type
== MLX5_IB_QPT_REG_UMR
) {
3817 MLX5_SET(qpc
, qpc
, mtu
, IB_MTU_4096
);
3818 MLX5_SET(qpc
, qpc
, log_msg_max
, 12);
3819 } else if (attr_mask
& IB_QP_PATH_MTU
) {
3820 if (attr
->path_mtu
< IB_MTU_256
||
3821 attr
->path_mtu
> IB_MTU_4096
) {
3822 mlx5_ib_warn(dev
, "invalid mtu %d\n", attr
->path_mtu
);
3826 MLX5_SET(qpc
, qpc
, mtu
, attr
->path_mtu
);
3827 MLX5_SET(qpc
, qpc
, log_msg_max
,
3828 MLX5_CAP_GEN(dev
->mdev
, log_max_msg
));
3831 if (attr_mask
& IB_QP_DEST_QPN
)
3832 MLX5_SET(qpc
, qpc
, remote_qpn
, attr
->dest_qp_num
);
3834 pri_path
= MLX5_ADDR_OF(qpc
, qpc
, primary_address_path
);
3835 alt_path
= MLX5_ADDR_OF(qpc
, qpc
, secondary_address_path
);
3837 if (attr_mask
& IB_QP_PKEY_INDEX
)
3838 MLX5_SET(ads
, pri_path
, pkey_index
, attr
->pkey_index
);
3840 /* todo implement counter_index functionality */
3842 if (is_sqp(ibqp
->qp_type
))
3843 MLX5_SET(ads
, pri_path
, vhca_port_num
, qp
->port
);
3845 if (attr_mask
& IB_QP_PORT
)
3846 MLX5_SET(ads
, pri_path
, vhca_port_num
, attr
->port_num
);
3848 if (attr_mask
& IB_QP_AV
) {
3849 err
= mlx5_set_path(dev
, qp
, &attr
->ah_attr
, pri_path
,
3850 attr_mask
& IB_QP_PORT
? attr
->port_num
:
3852 attr_mask
, 0, attr
, false);
3857 if (attr_mask
& IB_QP_TIMEOUT
)
3858 MLX5_SET(ads
, pri_path
, ack_timeout
, attr
->timeout
);
3860 if (attr_mask
& IB_QP_ALT_PATH
) {
3861 err
= mlx5_set_path(dev
, qp
, &attr
->alt_ah_attr
, alt_path
,
3863 attr_mask
| IB_QP_PKEY_INDEX
|
3870 get_cqs(qp
->ibqp
.qp_type
, qp
->ibqp
.send_cq
, qp
->ibqp
.recv_cq
,
3871 &send_cq
, &recv_cq
);
3873 MLX5_SET(qpc
, qpc
, pd
, pd
? pd
->pdn
: to_mpd(dev
->devr
.p0
)->pdn
);
3875 MLX5_SET(qpc
, qpc
, cqn_snd
, send_cq
->mcq
.cqn
);
3877 MLX5_SET(qpc
, qpc
, cqn_rcv
, recv_cq
->mcq
.cqn
);
3879 MLX5_SET(qpc
, qpc
, log_ack_req_freq
, MLX5_IB_ACK_REQ_FREQ
);
3881 if (attr_mask
& IB_QP_RNR_RETRY
)
3882 MLX5_SET(qpc
, qpc
, rnr_retry
, attr
->rnr_retry
);
3884 if (attr_mask
& IB_QP_RETRY_CNT
)
3885 MLX5_SET(qpc
, qpc
, retry_count
, attr
->retry_cnt
);
3887 if (attr_mask
& IB_QP_MAX_QP_RD_ATOMIC
&& attr
->max_rd_atomic
)
3888 MLX5_SET(qpc
, qpc
, log_sra_max
, ilog2(attr
->max_rd_atomic
));
3890 if (attr_mask
& IB_QP_SQ_PSN
)
3891 MLX5_SET(qpc
, qpc
, next_send_psn
, attr
->sq_psn
);
3893 if (attr_mask
& IB_QP_MAX_DEST_RD_ATOMIC
&& attr
->max_dest_rd_atomic
)
3894 MLX5_SET(qpc
, qpc
, log_rra_max
,
3895 ilog2(attr
->max_dest_rd_atomic
));
3897 if (attr_mask
& (IB_QP_ACCESS_FLAGS
| IB_QP_MAX_DEST_RD_ATOMIC
)) {
3898 err
= set_qpc_atomic_flags(qp
, attr
, attr_mask
, qpc
);
3903 if (attr_mask
& IB_QP_MIN_RNR_TIMER
)
3904 MLX5_SET(qpc
, qpc
, min_rnr_nak
, attr
->min_rnr_timer
);
3906 if (attr_mask
& IB_QP_RQ_PSN
)
3907 MLX5_SET(qpc
, qpc
, next_rcv_psn
, attr
->rq_psn
);
3909 if (attr_mask
& IB_QP_QKEY
)
3910 MLX5_SET(qpc
, qpc
, q_key
, attr
->qkey
);
3912 if (qp
->rq
.wqe_cnt
&& cur_state
== IB_QPS_RESET
&& new_state
== IB_QPS_INIT
)
3913 MLX5_SET64(qpc
, qpc
, dbr_addr
, qp
->db
.dma
);
3915 if (cur_state
== IB_QPS_RESET
&& new_state
== IB_QPS_INIT
) {
3916 u8 port_num
= (attr_mask
& IB_QP_PORT
? attr
->port_num
:
3919 /* Underlay port should be used - index 0 function per port */
3920 if (qp
->flags
& IB_QP_CREATE_SOURCE_QPN
)
3924 set_id
= ibqp
->counter
->id
;
3926 set_id
= mlx5_ib_get_counters_id(dev
, port_num
);
3927 MLX5_SET(qpc
, qpc
, counter_set_id
, set_id
);
3930 if (!ibqp
->uobject
&& cur_state
== IB_QPS_RESET
&& new_state
== IB_QPS_INIT
)
3931 MLX5_SET(qpc
, qpc
, rlky
, 1);
3933 if (qp
->flags
& MLX5_IB_QP_CREATE_SQPN_QP1
)
3934 MLX5_SET(qpc
, qpc
, deth_sqpn
, 1);
3936 mlx5_cur
= to_mlx5_state(cur_state
);
3937 mlx5_new
= to_mlx5_state(new_state
);
3939 if (mlx5_cur
>= MLX5_QP_NUM_STATE
|| mlx5_new
>= MLX5_QP_NUM_STATE
||
3940 !optab
[mlx5_cur
][mlx5_new
]) {
3945 op
= optab
[mlx5_cur
][mlx5_new
];
3946 optpar
|= ib_mask_to_mlx5_opt(attr_mask
);
3947 optpar
&= opt_mask
[mlx5_cur
][mlx5_new
][mlx5_st
];
3949 if (qp
->ibqp
.qp_type
== IB_QPT_RAW_PACKET
||
3950 qp
->flags
& IB_QP_CREATE_SOURCE_QPN
) {
3951 struct mlx5_modify_raw_qp_param raw_qp_param
= {};
3953 raw_qp_param
.operation
= op
;
3954 if (cur_state
== IB_QPS_RESET
&& new_state
== IB_QPS_INIT
) {
3955 raw_qp_param
.rq_q_ctr_id
= set_id
;
3956 raw_qp_param
.set_mask
|= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID
;
3959 if (attr_mask
& IB_QP_PORT
)
3960 raw_qp_param
.port
= attr
->port_num
;
3962 if (attr_mask
& IB_QP_RATE_LIMIT
) {
3963 raw_qp_param
.rl
.rate
= attr
->rate_limit
;
3965 if (ucmd
->burst_info
.max_burst_sz
) {
3966 if (attr
->rate_limit
&&
3967 MLX5_CAP_QOS(dev
->mdev
, packet_pacing_burst_bound
)) {
3968 raw_qp_param
.rl
.max_burst_sz
=
3969 ucmd
->burst_info
.max_burst_sz
;
3976 if (ucmd
->burst_info
.typical_pkt_sz
) {
3977 if (attr
->rate_limit
&&
3978 MLX5_CAP_QOS(dev
->mdev
, packet_pacing_typical_size
)) {
3979 raw_qp_param
.rl
.typical_pkt_sz
=
3980 ucmd
->burst_info
.typical_pkt_sz
;
3987 raw_qp_param
.set_mask
|= MLX5_RAW_QP_RATE_LIMIT
;
3990 err
= modify_raw_packet_qp(dev
, qp
, &raw_qp_param
, tx_affinity
);
3993 /* For the kernel flows, the resp will stay zero */
3995 MLX5_CAP_GEN(dev
->mdev
, ece_support
) ?
3996 ucmd
->ece_options
: 0;
3997 resp
->response_length
= sizeof(*resp
);
3999 err
= mlx5_core_qp_modify(dev
, op
, optpar
, qpc
, &base
->mqp
,
4000 &resp
->ece_options
);
4006 qp
->state
= new_state
;
4008 if (attr_mask
& IB_QP_ACCESS_FLAGS
)
4009 qp
->trans_qp
.atomic_rd_en
= attr
->qp_access_flags
;
4010 if (attr_mask
& IB_QP_MAX_DEST_RD_ATOMIC
)
4011 qp
->trans_qp
.resp_depth
= attr
->max_dest_rd_atomic
;
4012 if (attr_mask
& IB_QP_PORT
)
4013 qp
->port
= attr
->port_num
;
4014 if (attr_mask
& IB_QP_ALT_PATH
)
4015 qp
->trans_qp
.alt_port
= attr
->alt_port_num
;
4018 * If we moved a kernel QP to RESET, clean up all old CQ
4019 * entries and reinitialize the QP.
4021 if (new_state
== IB_QPS_RESET
&&
4022 !ibqp
->uobject
&& ibqp
->qp_type
!= IB_QPT_XRC_TGT
) {
4023 mlx5_ib_cq_clean(recv_cq
, base
->mqp
.qpn
,
4024 ibqp
->srq
? to_msrq(ibqp
->srq
) : NULL
);
4025 if (send_cq
!= recv_cq
)
4026 mlx5_ib_cq_clean(send_cq
, base
->mqp
.qpn
, NULL
);
4032 qp
->sq
.cur_post
= 0;
4034 qp
->sq
.cur_edge
= get_sq_edge(&qp
->sq
, 0);
4035 qp
->sq
.last_poll
= 0;
4036 qp
->db
.db
[MLX5_RCV_DBR
] = 0;
4037 qp
->db
.db
[MLX5_SND_DBR
] = 0;
4040 if ((new_state
== IB_QPS_RTS
) && qp
->counter_pending
) {
4041 err
= __mlx5_ib_qp_set_counter(ibqp
, ibqp
->counter
);
4043 qp
->counter_pending
= 0;
4051 static inline bool is_valid_mask(int mask
, int req
, int opt
)
4053 if ((mask
& req
) != req
)
4056 if (mask
& ~(req
| opt
))
4062 /* check valid transition for driver QP types
4063 * for now the only QP type that this function supports is DCI
4065 static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state
, enum ib_qp_state new_state
,
4066 enum ib_qp_attr_mask attr_mask
)
4068 int req
= IB_QP_STATE
;
4071 if (new_state
== IB_QPS_RESET
) {
4072 return is_valid_mask(attr_mask
, req
, opt
);
4073 } else if (cur_state
== IB_QPS_RESET
&& new_state
== IB_QPS_INIT
) {
4074 req
|= IB_QP_PKEY_INDEX
| IB_QP_PORT
;
4075 return is_valid_mask(attr_mask
, req
, opt
);
4076 } else if (cur_state
== IB_QPS_INIT
&& new_state
== IB_QPS_INIT
) {
4077 opt
= IB_QP_PKEY_INDEX
| IB_QP_PORT
;
4078 return is_valid_mask(attr_mask
, req
, opt
);
4079 } else if (cur_state
== IB_QPS_INIT
&& new_state
== IB_QPS_RTR
) {
4080 req
|= IB_QP_PATH_MTU
;
4081 opt
= IB_QP_PKEY_INDEX
| IB_QP_AV
;
4082 return is_valid_mask(attr_mask
, req
, opt
);
4083 } else if (cur_state
== IB_QPS_RTR
&& new_state
== IB_QPS_RTS
) {
4084 req
|= IB_QP_TIMEOUT
| IB_QP_RETRY_CNT
| IB_QP_RNR_RETRY
|
4085 IB_QP_MAX_QP_RD_ATOMIC
| IB_QP_SQ_PSN
;
4086 opt
= IB_QP_MIN_RNR_TIMER
;
4087 return is_valid_mask(attr_mask
, req
, opt
);
4088 } else if (cur_state
== IB_QPS_RTS
&& new_state
== IB_QPS_RTS
) {
4089 opt
= IB_QP_MIN_RNR_TIMER
;
4090 return is_valid_mask(attr_mask
, req
, opt
);
4091 } else if (cur_state
!= IB_QPS_RESET
&& new_state
== IB_QPS_ERR
) {
4092 return is_valid_mask(attr_mask
, req
, opt
);
4097 /* mlx5_ib_modify_dct: modify a DCT QP
4098 * valid transitions are:
4099 * RESET to INIT: must set access_flags, pkey_index and port
4100 * INIT to RTR : must set min_rnr_timer, tclass, flow_label,
4101 * mtu, gid_index and hop_limit
4102 * Other transitions and attributes are illegal
4104 static int mlx5_ib_modify_dct(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
4105 int attr_mask
, struct mlx5_ib_modify_qp
*ucmd
,
4106 struct ib_udata
*udata
)
4108 struct mlx5_ib_qp
*qp
= to_mqp(ibqp
);
4109 struct mlx5_ib_dev
*dev
= to_mdev(ibqp
->device
);
4110 enum ib_qp_state cur_state
, new_state
;
4111 int required
= IB_QP_STATE
;
4115 if (!(attr_mask
& IB_QP_STATE
))
4118 cur_state
= qp
->state
;
4119 new_state
= attr
->qp_state
;
4121 dctc
= MLX5_ADDR_OF(create_dct_in
, qp
->dct
.in
, dct_context_entry
);
4122 if (MLX5_CAP_GEN(dev
->mdev
, ece_support
) && ucmd
->ece_options
)
4124 * DCT doesn't initialize QP till modify command is executed,
4125 * so we need to overwrite previously set ECE field if user
4126 * provided any value except zero, which means not set/not
4129 MLX5_SET(dctc
, dctc
, ece
, ucmd
->ece_options
);
4131 if (cur_state
== IB_QPS_RESET
&& new_state
== IB_QPS_INIT
) {
4134 required
|= IB_QP_ACCESS_FLAGS
| IB_QP_PKEY_INDEX
| IB_QP_PORT
;
4135 if (!is_valid_mask(attr_mask
, required
, 0))
4138 if (attr
->port_num
== 0 ||
4139 attr
->port_num
> MLX5_CAP_GEN(dev
->mdev
, num_ports
)) {
4140 mlx5_ib_dbg(dev
, "invalid port number %d. number of ports is %d\n",
4141 attr
->port_num
, dev
->num_ports
);
4144 if (attr
->qp_access_flags
& IB_ACCESS_REMOTE_READ
)
4145 MLX5_SET(dctc
, dctc
, rre
, 1);
4146 if (attr
->qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)
4147 MLX5_SET(dctc
, dctc
, rwe
, 1);
4148 if (attr
->qp_access_flags
& IB_ACCESS_REMOTE_ATOMIC
) {
4151 atomic_mode
= get_atomic_mode(dev
, MLX5_IB_QPT_DCT
);
4152 if (atomic_mode
< 0)
4155 MLX5_SET(dctc
, dctc
, atomic_mode
, atomic_mode
);
4156 MLX5_SET(dctc
, dctc
, rae
, 1);
4158 MLX5_SET(dctc
, dctc
, pkey_index
, attr
->pkey_index
);
4159 if (mlx5_lag_is_active(dev
->mdev
))
4160 MLX5_SET(dctc
, dctc
, port
,
4161 get_tx_affinity_rr(dev
, udata
));
4163 MLX5_SET(dctc
, dctc
, port
, attr
->port_num
);
4165 set_id
= mlx5_ib_get_counters_id(dev
, attr
->port_num
- 1);
4166 MLX5_SET(dctc
, dctc
, counter_set_id
, set_id
);
4167 } else if (cur_state
== IB_QPS_INIT
&& new_state
== IB_QPS_RTR
) {
4168 struct mlx5_ib_modify_qp_resp resp
= {};
4169 u32 out
[MLX5_ST_SZ_DW(create_dct_out
)] = {};
4170 u32 min_resp_len
= offsetofend(typeof(resp
), dctn
);
4172 if (udata
->outlen
< min_resp_len
)
4175 * If we don't have enough space for the ECE options,
4176 * simply indicate it with resp.response_length.
4178 resp
.response_length
= (udata
->outlen
< sizeof(resp
)) ?
4182 required
|= IB_QP_MIN_RNR_TIMER
| IB_QP_AV
| IB_QP_PATH_MTU
;
4183 if (!is_valid_mask(attr_mask
, required
, 0))
4185 MLX5_SET(dctc
, dctc
, min_rnr_nak
, attr
->min_rnr_timer
);
4186 MLX5_SET(dctc
, dctc
, tclass
, attr
->ah_attr
.grh
.traffic_class
);
4187 MLX5_SET(dctc
, dctc
, flow_label
, attr
->ah_attr
.grh
.flow_label
);
4188 MLX5_SET(dctc
, dctc
, mtu
, attr
->path_mtu
);
4189 MLX5_SET(dctc
, dctc
, my_addr_index
, attr
->ah_attr
.grh
.sgid_index
);
4190 MLX5_SET(dctc
, dctc
, hop_limit
, attr
->ah_attr
.grh
.hop_limit
);
4192 err
= mlx5_core_create_dct(dev
, &qp
->dct
.mdct
, qp
->dct
.in
,
4193 MLX5_ST_SZ_BYTES(create_dct_in
), out
,
4197 resp
.dctn
= qp
->dct
.mdct
.mqp
.qpn
;
4198 if (MLX5_CAP_GEN(dev
->mdev
, ece_support
))
4199 resp
.ece_options
= MLX5_GET(create_dct_out
, out
, ece
);
4200 err
= ib_copy_to_udata(udata
, &resp
, resp
.response_length
);
4202 mlx5_core_destroy_dct(dev
, &qp
->dct
.mdct
);
4206 mlx5_ib_warn(dev
, "Modify DCT: Invalid transition from %d to %d\n", cur_state
, new_state
);
4210 qp
->state
= new_state
;
4214 int mlx5_ib_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
4215 int attr_mask
, struct ib_udata
*udata
)
4217 struct mlx5_ib_dev
*dev
= to_mdev(ibqp
->device
);
4218 struct mlx5_ib_modify_qp_resp resp
= {};
4219 struct mlx5_ib_qp
*qp
= to_mqp(ibqp
);
4220 struct mlx5_ib_modify_qp ucmd
= {};
4221 enum ib_qp_type qp_type
;
4222 enum ib_qp_state cur_state
, new_state
;
4226 if (attr_mask
& ~(IB_QP_ATTR_STANDARD_BITS
| IB_QP_RATE_LIMIT
))
4229 if (ibqp
->rwq_ind_tbl
)
4232 if (udata
&& udata
->inlen
) {
4233 if (udata
->inlen
< offsetofend(typeof(ucmd
), ece_options
))
4236 if (udata
->inlen
> sizeof(ucmd
) &&
4237 !ib_is_udata_cleared(udata
, sizeof(ucmd
),
4238 udata
->inlen
- sizeof(ucmd
)))
4241 if (ib_copy_from_udata(&ucmd
, udata
,
4242 min(udata
->inlen
, sizeof(ucmd
))))
4245 if (ucmd
.comp_mask
||
4246 memchr_inv(&ucmd
.burst_info
.reserved
, 0,
4247 sizeof(ucmd
.burst_info
.reserved
)))
4252 if (unlikely(ibqp
->qp_type
== IB_QPT_GSI
))
4253 return mlx5_ib_gsi_modify_qp(ibqp
, attr
, attr_mask
);
4255 qp_type
= (unlikely(ibqp
->qp_type
== MLX5_IB_QPT_HW_GSI
)) ? IB_QPT_GSI
:
4258 if (qp_type
== MLX5_IB_QPT_DCT
)
4259 return mlx5_ib_modify_dct(ibqp
, attr
, attr_mask
, &ucmd
, udata
);
4261 mutex_lock(&qp
->mutex
);
4263 cur_state
= attr_mask
& IB_QP_CUR_STATE
? attr
->cur_qp_state
: qp
->state
;
4264 new_state
= attr_mask
& IB_QP_STATE
? attr
->qp_state
: cur_state
;
4266 if (!(cur_state
== new_state
&& cur_state
== IB_QPS_RESET
)) {
4267 port
= attr_mask
& IB_QP_PORT
? attr
->port_num
: qp
->port
;
4270 if (qp
->flags
& IB_QP_CREATE_SOURCE_QPN
) {
4271 if (attr_mask
& ~(IB_QP_STATE
| IB_QP_CUR_STATE
)) {
4272 mlx5_ib_dbg(dev
, "invalid attr_mask 0x%x when underlay QP is used\n",
4276 } else if (qp_type
!= MLX5_IB_QPT_REG_UMR
&&
4277 qp_type
!= MLX5_IB_QPT_DCI
&&
4278 !ib_modify_qp_is_ok(cur_state
, new_state
, qp_type
,
4280 mlx5_ib_dbg(dev
, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
4281 cur_state
, new_state
, ibqp
->qp_type
, attr_mask
);
4283 } else if (qp_type
== MLX5_IB_QPT_DCI
&&
4284 !modify_dci_qp_is_ok(cur_state
, new_state
, attr_mask
)) {
4285 mlx5_ib_dbg(dev
, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
4286 cur_state
, new_state
, qp_type
, attr_mask
);
4290 if ((attr_mask
& IB_QP_PORT
) &&
4291 (attr
->port_num
== 0 ||
4292 attr
->port_num
> dev
->num_ports
)) {
4293 mlx5_ib_dbg(dev
, "invalid port number %d. number of ports is %d\n",
4294 attr
->port_num
, dev
->num_ports
);
4298 if (attr_mask
& IB_QP_PKEY_INDEX
) {
4299 port
= attr_mask
& IB_QP_PORT
? attr
->port_num
: qp
->port
;
4300 if (attr
->pkey_index
>=
4301 dev
->mdev
->port_caps
[port
- 1].pkey_table_len
) {
4302 mlx5_ib_dbg(dev
, "invalid pkey index %d\n",
4308 if (attr_mask
& IB_QP_MAX_QP_RD_ATOMIC
&&
4309 attr
->max_rd_atomic
>
4310 (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_ra_res_qp
))) {
4311 mlx5_ib_dbg(dev
, "invalid max_rd_atomic value %d\n",
4312 attr
->max_rd_atomic
);
4316 if (attr_mask
& IB_QP_MAX_DEST_RD_ATOMIC
&&
4317 attr
->max_dest_rd_atomic
>
4318 (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_ra_req_qp
))) {
4319 mlx5_ib_dbg(dev
, "invalid max_dest_rd_atomic value %d\n",
4320 attr
->max_dest_rd_atomic
);
4324 if (cur_state
== new_state
&& cur_state
== IB_QPS_RESET
) {
4329 err
= __mlx5_ib_modify_qp(ibqp
, attr
, attr_mask
, cur_state
,
4330 new_state
, &ucmd
, &resp
, udata
);
4332 /* resp.response_length is set in ECE supported flows only */
4333 if (!err
&& resp
.response_length
&&
4334 udata
->outlen
>= resp
.response_length
)
4335 /* Return -EFAULT to the user and expect him to destroy QP. */
4336 err
= ib_copy_to_udata(udata
, &resp
, resp
.response_length
);
4339 mutex_unlock(&qp
->mutex
);
4343 static inline enum ib_qp_state
to_ib_qp_state(enum mlx5_qp_state mlx5_state
)
4345 switch (mlx5_state
) {
4346 case MLX5_QP_STATE_RST
: return IB_QPS_RESET
;
4347 case MLX5_QP_STATE_INIT
: return IB_QPS_INIT
;
4348 case MLX5_QP_STATE_RTR
: return IB_QPS_RTR
;
4349 case MLX5_QP_STATE_RTS
: return IB_QPS_RTS
;
4350 case MLX5_QP_STATE_SQ_DRAINING
:
4351 case MLX5_QP_STATE_SQD
: return IB_QPS_SQD
;
4352 case MLX5_QP_STATE_SQER
: return IB_QPS_SQE
;
4353 case MLX5_QP_STATE_ERR
: return IB_QPS_ERR
;
4358 static inline enum ib_mig_state
to_ib_mig_state(int mlx5_mig_state
)
4360 switch (mlx5_mig_state
) {
4361 case MLX5_QP_PM_ARMED
: return IB_MIG_ARMED
;
4362 case MLX5_QP_PM_REARM
: return IB_MIG_REARM
;
4363 case MLX5_QP_PM_MIGRATED
: return IB_MIG_MIGRATED
;
4368 static void to_rdma_ah_attr(struct mlx5_ib_dev
*ibdev
,
4369 struct rdma_ah_attr
*ah_attr
, void *path
)
4371 int port
= MLX5_GET(ads
, path
, vhca_port_num
);
4374 memset(ah_attr
, 0, sizeof(*ah_attr
));
4376 if (!port
|| port
> ibdev
->num_ports
)
4379 ah_attr
->type
= rdma_ah_find_type(&ibdev
->ib_dev
, port
);
4381 rdma_ah_set_port_num(ah_attr
, port
);
4382 rdma_ah_set_sl(ah_attr
, MLX5_GET(ads
, path
, sl
));
4384 rdma_ah_set_dlid(ah_attr
, MLX5_GET(ads
, path
, rlid
));
4385 rdma_ah_set_path_bits(ah_attr
, MLX5_GET(ads
, path
, mlid
));
4387 static_rate
= MLX5_GET(ads
, path
, stat_rate
);
4388 rdma_ah_set_static_rate(ah_attr
, static_rate
? static_rate
- 5 : 0);
4389 if (MLX5_GET(ads
, path
, grh
) ||
4390 ah_attr
->type
== RDMA_AH_ATTR_TYPE_ROCE
) {
4391 rdma_ah_set_grh(ah_attr
, NULL
, MLX5_GET(ads
, path
, flow_label
),
4392 MLX5_GET(ads
, path
, src_addr_index
),
4393 MLX5_GET(ads
, path
, hop_limit
),
4394 MLX5_GET(ads
, path
, tclass
));
4395 rdma_ah_set_dgid_raw(ah_attr
, MLX5_ADDR_OF(ads
, path
, rgid_rip
));
4399 static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev
*dev
,
4400 struct mlx5_ib_sq
*sq
,
4405 err
= mlx5_core_query_sq_state(dev
->mdev
, sq
->base
.mqp
.qpn
, sq_state
);
4408 sq
->state
= *sq_state
;
4414 static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev
*dev
,
4415 struct mlx5_ib_rq
*rq
,
4423 inlen
= MLX5_ST_SZ_BYTES(query_rq_out
);
4424 out
= kvzalloc(inlen
, GFP_KERNEL
);
4428 err
= mlx5_core_query_rq(dev
->mdev
, rq
->base
.mqp
.qpn
, out
);
4432 rqc
= MLX5_ADDR_OF(query_rq_out
, out
, rq_context
);
4433 *rq_state
= MLX5_GET(rqc
, rqc
, state
);
4434 rq
->state
= *rq_state
;
4441 static int sqrq_state_to_qp_state(u8 sq_state
, u8 rq_state
,
4442 struct mlx5_ib_qp
*qp
, u8
*qp_state
)
4444 static const u8 sqrq_trans
[MLX5_RQ_NUM_STATE
][MLX5_SQ_NUM_STATE
] = {
4445 [MLX5_RQC_STATE_RST
] = {
4446 [MLX5_SQC_STATE_RST
] = IB_QPS_RESET
,
4447 [MLX5_SQC_STATE_RDY
] = MLX5_QP_STATE_BAD
,
4448 [MLX5_SQC_STATE_ERR
] = MLX5_QP_STATE_BAD
,
4449 [MLX5_SQ_STATE_NA
] = IB_QPS_RESET
,
4451 [MLX5_RQC_STATE_RDY
] = {
4452 [MLX5_SQC_STATE_RST
] = MLX5_QP_STATE
,
4453 [MLX5_SQC_STATE_RDY
] = MLX5_QP_STATE
,
4454 [MLX5_SQC_STATE_ERR
] = IB_QPS_SQE
,
4455 [MLX5_SQ_STATE_NA
] = MLX5_QP_STATE
,
4457 [MLX5_RQC_STATE_ERR
] = {
4458 [MLX5_SQC_STATE_RST
] = MLX5_QP_STATE_BAD
,
4459 [MLX5_SQC_STATE_RDY
] = MLX5_QP_STATE_BAD
,
4460 [MLX5_SQC_STATE_ERR
] = IB_QPS_ERR
,
4461 [MLX5_SQ_STATE_NA
] = IB_QPS_ERR
,
4463 [MLX5_RQ_STATE_NA
] = {
4464 [MLX5_SQC_STATE_RST
] = MLX5_QP_STATE
,
4465 [MLX5_SQC_STATE_RDY
] = MLX5_QP_STATE
,
4466 [MLX5_SQC_STATE_ERR
] = MLX5_QP_STATE
,
4467 [MLX5_SQ_STATE_NA
] = MLX5_QP_STATE_BAD
,
4471 *qp_state
= sqrq_trans
[rq_state
][sq_state
];
4473 if (*qp_state
== MLX5_QP_STATE_BAD
) {
4474 WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
4475 qp
->raw_packet_qp
.sq
.base
.mqp
.qpn
, sq_state
,
4476 qp
->raw_packet_qp
.rq
.base
.mqp
.qpn
, rq_state
);
4480 if (*qp_state
== MLX5_QP_STATE
)
4481 *qp_state
= qp
->state
;
4486 static int query_raw_packet_qp_state(struct mlx5_ib_dev
*dev
,
4487 struct mlx5_ib_qp
*qp
,
4488 u8
*raw_packet_qp_state
)
4490 struct mlx5_ib_raw_packet_qp
*raw_packet_qp
= &qp
->raw_packet_qp
;
4491 struct mlx5_ib_sq
*sq
= &raw_packet_qp
->sq
;
4492 struct mlx5_ib_rq
*rq
= &raw_packet_qp
->rq
;
4494 u8 sq_state
= MLX5_SQ_STATE_NA
;
4495 u8 rq_state
= MLX5_RQ_STATE_NA
;
4497 if (qp
->sq
.wqe_cnt
) {
4498 err
= query_raw_packet_qp_sq_state(dev
, sq
, &sq_state
);
4503 if (qp
->rq
.wqe_cnt
) {
4504 err
= query_raw_packet_qp_rq_state(dev
, rq
, &rq_state
);
4509 return sqrq_state_to_qp_state(sq_state
, rq_state
, qp
,
4510 raw_packet_qp_state
);
4513 static int query_qp_attr(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*qp
,
4514 struct ib_qp_attr
*qp_attr
)
4516 int outlen
= MLX5_ST_SZ_BYTES(query_qp_out
);
4517 void *qpc
, *pri_path
, *alt_path
;
4521 outb
= kzalloc(outlen
, GFP_KERNEL
);
4525 err
= mlx5_core_qp_query(dev
, &qp
->trans_qp
.base
.mqp
, outb
, outlen
);
4529 qpc
= MLX5_ADDR_OF(query_qp_out
, outb
, qpc
);
4531 qp
->state
= to_ib_qp_state(MLX5_GET(qpc
, qpc
, state
));
4532 if (MLX5_GET(qpc
, qpc
, state
) == MLX5_QP_STATE_SQ_DRAINING
)
4533 qp_attr
->sq_draining
= 1;
4535 qp_attr
->path_mtu
= MLX5_GET(qpc
, qpc
, mtu
);
4536 qp_attr
->path_mig_state
= to_ib_mig_state(MLX5_GET(qpc
, qpc
, pm_state
));
4537 qp_attr
->qkey
= MLX5_GET(qpc
, qpc
, q_key
);
4538 qp_attr
->rq_psn
= MLX5_GET(qpc
, qpc
, next_rcv_psn
);
4539 qp_attr
->sq_psn
= MLX5_GET(qpc
, qpc
, next_send_psn
);
4540 qp_attr
->dest_qp_num
= MLX5_GET(qpc
, qpc
, remote_qpn
);
4542 if (MLX5_GET(qpc
, qpc
, rre
))
4543 qp_attr
->qp_access_flags
|= IB_ACCESS_REMOTE_READ
;
4544 if (MLX5_GET(qpc
, qpc
, rwe
))
4545 qp_attr
->qp_access_flags
|= IB_ACCESS_REMOTE_WRITE
;
4546 if (MLX5_GET(qpc
, qpc
, rae
))
4547 qp_attr
->qp_access_flags
|= IB_ACCESS_REMOTE_ATOMIC
;
4549 qp_attr
->max_rd_atomic
= 1 << MLX5_GET(qpc
, qpc
, log_sra_max
);
4550 qp_attr
->max_dest_rd_atomic
= 1 << MLX5_GET(qpc
, qpc
, log_rra_max
);
4551 qp_attr
->min_rnr_timer
= MLX5_GET(qpc
, qpc
, min_rnr_nak
);
4552 qp_attr
->retry_cnt
= MLX5_GET(qpc
, qpc
, retry_count
);
4553 qp_attr
->rnr_retry
= MLX5_GET(qpc
, qpc
, rnr_retry
);
4555 pri_path
= MLX5_ADDR_OF(qpc
, qpc
, primary_address_path
);
4556 alt_path
= MLX5_ADDR_OF(qpc
, qpc
, secondary_address_path
);
4558 if (qp
->ibqp
.qp_type
== IB_QPT_RC
|| qp
->ibqp
.qp_type
== IB_QPT_UC
||
4559 qp
->ibqp
.qp_type
== IB_QPT_XRC_INI
||
4560 qp
->ibqp
.qp_type
== IB_QPT_XRC_TGT
) {
4561 to_rdma_ah_attr(dev
, &qp_attr
->ah_attr
, pri_path
);
4562 to_rdma_ah_attr(dev
, &qp_attr
->alt_ah_attr
, alt_path
);
4563 qp_attr
->alt_pkey_index
= MLX5_GET(ads
, alt_path
, pkey_index
);
4564 qp_attr
->alt_port_num
= MLX5_GET(ads
, alt_path
, vhca_port_num
);
4567 qp_attr
->pkey_index
= MLX5_GET(ads
, pri_path
, pkey_index
);
4568 qp_attr
->port_num
= MLX5_GET(ads
, pri_path
, vhca_port_num
);
4569 qp_attr
->timeout
= MLX5_GET(ads
, pri_path
, ack_timeout
);
4570 qp_attr
->alt_timeout
= MLX5_GET(ads
, alt_path
, ack_timeout
);
4577 static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev
*dev
, struct mlx5_ib_qp
*mqp
,
4578 struct ib_qp_attr
*qp_attr
, int qp_attr_mask
,
4579 struct ib_qp_init_attr
*qp_init_attr
)
4581 struct mlx5_core_dct
*dct
= &mqp
->dct
.mdct
;
4583 u32 access_flags
= 0;
4584 int outlen
= MLX5_ST_SZ_BYTES(query_dct_out
);
4587 int supported_mask
= IB_QP_STATE
|
4588 IB_QP_ACCESS_FLAGS
|
4590 IB_QP_MIN_RNR_TIMER
|
4595 if (qp_attr_mask
& ~supported_mask
)
4597 if (mqp
->state
!= IB_QPS_RTR
)
4600 out
= kzalloc(outlen
, GFP_KERNEL
);
4604 err
= mlx5_core_dct_query(dev
, dct
, out
, outlen
);
4608 dctc
= MLX5_ADDR_OF(query_dct_out
, out
, dct_context_entry
);
4610 if (qp_attr_mask
& IB_QP_STATE
)
4611 qp_attr
->qp_state
= IB_QPS_RTR
;
4613 if (qp_attr_mask
& IB_QP_ACCESS_FLAGS
) {
4614 if (MLX5_GET(dctc
, dctc
, rre
))
4615 access_flags
|= IB_ACCESS_REMOTE_READ
;
4616 if (MLX5_GET(dctc
, dctc
, rwe
))
4617 access_flags
|= IB_ACCESS_REMOTE_WRITE
;
4618 if (MLX5_GET(dctc
, dctc
, rae
))
4619 access_flags
|= IB_ACCESS_REMOTE_ATOMIC
;
4620 qp_attr
->qp_access_flags
= access_flags
;
4623 if (qp_attr_mask
& IB_QP_PORT
)
4624 qp_attr
->port_num
= MLX5_GET(dctc
, dctc
, port
);
4625 if (qp_attr_mask
& IB_QP_MIN_RNR_TIMER
)
4626 qp_attr
->min_rnr_timer
= MLX5_GET(dctc
, dctc
, min_rnr_nak
);
4627 if (qp_attr_mask
& IB_QP_AV
) {
4628 qp_attr
->ah_attr
.grh
.traffic_class
= MLX5_GET(dctc
, dctc
, tclass
);
4629 qp_attr
->ah_attr
.grh
.flow_label
= MLX5_GET(dctc
, dctc
, flow_label
);
4630 qp_attr
->ah_attr
.grh
.sgid_index
= MLX5_GET(dctc
, dctc
, my_addr_index
);
4631 qp_attr
->ah_attr
.grh
.hop_limit
= MLX5_GET(dctc
, dctc
, hop_limit
);
4633 if (qp_attr_mask
& IB_QP_PATH_MTU
)
4634 qp_attr
->path_mtu
= MLX5_GET(dctc
, dctc
, mtu
);
4635 if (qp_attr_mask
& IB_QP_PKEY_INDEX
)
4636 qp_attr
->pkey_index
= MLX5_GET(dctc
, dctc
, pkey_index
);
4642 int mlx5_ib_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*qp_attr
,
4643 int qp_attr_mask
, struct ib_qp_init_attr
*qp_init_attr
)
4645 struct mlx5_ib_dev
*dev
= to_mdev(ibqp
->device
);
4646 struct mlx5_ib_qp
*qp
= to_mqp(ibqp
);
4648 u8 raw_packet_qp_state
;
4650 if (ibqp
->rwq_ind_tbl
)
4653 if (unlikely(ibqp
->qp_type
== IB_QPT_GSI
))
4654 return mlx5_ib_gsi_query_qp(ibqp
, qp_attr
, qp_attr_mask
,
4657 /* Not all of output fields are applicable, make sure to zero them */
4658 memset(qp_init_attr
, 0, sizeof(*qp_init_attr
));
4659 memset(qp_attr
, 0, sizeof(*qp_attr
));
4661 if (unlikely(qp
->type
== MLX5_IB_QPT_DCT
))
4662 return mlx5_ib_dct_query_qp(dev
, qp
, qp_attr
,
4663 qp_attr_mask
, qp_init_attr
);
4665 mutex_lock(&qp
->mutex
);
4667 if (qp
->ibqp
.qp_type
== IB_QPT_RAW_PACKET
||
4668 qp
->flags
& IB_QP_CREATE_SOURCE_QPN
) {
4669 err
= query_raw_packet_qp_state(dev
, qp
, &raw_packet_qp_state
);
4672 qp
->state
= raw_packet_qp_state
;
4673 qp_attr
->port_num
= 1;
4675 err
= query_qp_attr(dev
, qp
, qp_attr
);
4680 qp_attr
->qp_state
= qp
->state
;
4681 qp_attr
->cur_qp_state
= qp_attr
->qp_state
;
4682 qp_attr
->cap
.max_recv_wr
= qp
->rq
.wqe_cnt
;
4683 qp_attr
->cap
.max_recv_sge
= qp
->rq
.max_gs
;
4685 if (!ibqp
->uobject
) {
4686 qp_attr
->cap
.max_send_wr
= qp
->sq
.max_post
;
4687 qp_attr
->cap
.max_send_sge
= qp
->sq
.max_gs
;
4688 qp_init_attr
->qp_context
= ibqp
->qp_context
;
4690 qp_attr
->cap
.max_send_wr
= 0;
4691 qp_attr
->cap
.max_send_sge
= 0;
4694 qp_init_attr
->qp_type
= ibqp
->qp_type
;
4695 qp_init_attr
->recv_cq
= ibqp
->recv_cq
;
4696 qp_init_attr
->send_cq
= ibqp
->send_cq
;
4697 qp_init_attr
->srq
= ibqp
->srq
;
4698 qp_attr
->cap
.max_inline_data
= qp
->max_inline_data
;
4700 qp_init_attr
->cap
= qp_attr
->cap
;
4702 qp_init_attr
->create_flags
= qp
->flags
;
4704 qp_init_attr
->sq_sig_type
= qp
->sq_signal_bits
& MLX5_WQE_CTRL_CQ_UPDATE
?
4705 IB_SIGNAL_ALL_WR
: IB_SIGNAL_REQ_WR
;
4708 mutex_unlock(&qp
->mutex
);
4712 int mlx5_ib_alloc_xrcd(struct ib_xrcd
*ibxrcd
, struct ib_udata
*udata
)
4714 struct mlx5_ib_dev
*dev
= to_mdev(ibxrcd
->device
);
4715 struct mlx5_ib_xrcd
*xrcd
= to_mxrcd(ibxrcd
);
4717 if (!MLX5_CAP_GEN(dev
->mdev
, xrc
))
4720 return mlx5_cmd_xrcd_alloc(dev
->mdev
, &xrcd
->xrcdn
, 0);
4723 int mlx5_ib_dealloc_xrcd(struct ib_xrcd
*xrcd
, struct ib_udata
*udata
)
4725 struct mlx5_ib_dev
*dev
= to_mdev(xrcd
->device
);
4726 u32 xrcdn
= to_mxrcd(xrcd
)->xrcdn
;
4728 return mlx5_cmd_xrcd_dealloc(dev
->mdev
, xrcdn
, 0);
4731 static void mlx5_ib_wq_event(struct mlx5_core_qp
*core_qp
, int type
)
4733 struct mlx5_ib_rwq
*rwq
= to_mibrwq(core_qp
);
4734 struct mlx5_ib_dev
*dev
= to_mdev(rwq
->ibwq
.device
);
4735 struct ib_event event
;
4737 if (rwq
->ibwq
.event_handler
) {
4738 event
.device
= rwq
->ibwq
.device
;
4739 event
.element
.wq
= &rwq
->ibwq
;
4741 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR
:
4742 event
.event
= IB_EVENT_WQ_FATAL
;
4745 mlx5_ib_warn(dev
, "Unexpected event type %d on WQ %06x\n", type
, core_qp
->qpn
);
4749 rwq
->ibwq
.event_handler(&event
, rwq
->ibwq
.wq_context
);
4753 static int set_delay_drop(struct mlx5_ib_dev
*dev
)
4757 mutex_lock(&dev
->delay_drop
.lock
);
4758 if (dev
->delay_drop
.activate
)
4761 err
= mlx5_core_set_delay_drop(dev
, dev
->delay_drop
.timeout
);
4765 dev
->delay_drop
.activate
= true;
4767 mutex_unlock(&dev
->delay_drop
.lock
);
4770 atomic_inc(&dev
->delay_drop
.rqs_cnt
);
4774 static int create_rq(struct mlx5_ib_rwq
*rwq
, struct ib_pd
*pd
,
4775 struct ib_wq_init_attr
*init_attr
)
4777 struct mlx5_ib_dev
*dev
;
4778 int has_net_offloads
;
4786 dev
= to_mdev(pd
->device
);
4788 inlen
= MLX5_ST_SZ_BYTES(create_rq_in
) + sizeof(u64
) * rwq
->rq_num_pas
;
4789 in
= kvzalloc(inlen
, GFP_KERNEL
);
4793 MLX5_SET(create_rq_in
, in
, uid
, to_mpd(pd
)->uid
);
4794 rqc
= MLX5_ADDR_OF(create_rq_in
, in
, ctx
);
4795 MLX5_SET(rqc
, rqc
, mem_rq_type
,
4796 MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE
);
4797 MLX5_SET(rqc
, rqc
, user_index
, rwq
->user_index
);
4798 MLX5_SET(rqc
, rqc
, cqn
, to_mcq(init_attr
->cq
)->mcq
.cqn
);
4799 MLX5_SET(rqc
, rqc
, state
, MLX5_RQC_STATE_RST
);
4800 MLX5_SET(rqc
, rqc
, flush_in_error_en
, 1);
4801 wq
= MLX5_ADDR_OF(rqc
, rqc
, wq
);
4802 MLX5_SET(wq
, wq
, wq_type
,
4803 rwq
->create_flags
& MLX5_IB_WQ_FLAGS_STRIDING_RQ
?
4804 MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ
: MLX5_WQ_TYPE_CYCLIC
);
4805 if (init_attr
->create_flags
& IB_WQ_FLAGS_PCI_WRITE_END_PADDING
) {
4806 if (!MLX5_CAP_GEN(dev
->mdev
, end_pad
)) {
4807 mlx5_ib_dbg(dev
, "Scatter end padding is not supported\n");
4811 MLX5_SET(wq
, wq
, end_padding_mode
, MLX5_WQ_END_PAD_MODE_ALIGN
);
4814 MLX5_SET(wq
, wq
, log_wq_stride
, rwq
->log_rq_stride
);
4815 if (rwq
->create_flags
& MLX5_IB_WQ_FLAGS_STRIDING_RQ
) {
4817 * In Firmware number of strides in each WQE is:
4818 * "512 * 2^single_wqe_log_num_of_strides"
4819 * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are
4820 * accepted as 0 to 9
4822 static const u8 fw_map
[] = { 10, 11, 12, 13, 14, 15, 0, 1,
4823 2, 3, 4, 5, 6, 7, 8, 9 };
4824 MLX5_SET(wq
, wq
, two_byte_shift_en
, rwq
->two_byte_shift_en
);
4825 MLX5_SET(wq
, wq
, log_wqe_stride_size
,
4826 rwq
->single_stride_log_num_of_bytes
-
4827 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES
);
4828 MLX5_SET(wq
, wq
, log_wqe_num_of_strides
,
4829 fw_map
[rwq
->log_num_strides
-
4830 MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES
]);
4832 MLX5_SET(wq
, wq
, log_wq_sz
, rwq
->log_rq_size
);
4833 MLX5_SET(wq
, wq
, pd
, to_mpd(pd
)->pdn
);
4834 MLX5_SET(wq
, wq
, page_offset
, rwq
->rq_page_offset
);
4835 MLX5_SET(wq
, wq
, log_wq_pg_sz
, rwq
->log_page_size
);
4836 MLX5_SET(wq
, wq
, wq_signature
, rwq
->wq_sig
);
4837 MLX5_SET64(wq
, wq
, dbr_addr
, rwq
->db
.dma
);
4838 has_net_offloads
= MLX5_CAP_GEN(dev
->mdev
, eth_net_offloads
);
4839 if (init_attr
->create_flags
& IB_WQ_FLAGS_CVLAN_STRIPPING
) {
4840 if (!(has_net_offloads
&& MLX5_CAP_ETH(dev
->mdev
, vlan_cap
))) {
4841 mlx5_ib_dbg(dev
, "VLAN offloads are not supported\n");
4846 MLX5_SET(rqc
, rqc
, vsd
, 1);
4848 if (init_attr
->create_flags
& IB_WQ_FLAGS_SCATTER_FCS
) {
4849 if (!(has_net_offloads
&& MLX5_CAP_ETH(dev
->mdev
, scatter_fcs
))) {
4850 mlx5_ib_dbg(dev
, "Scatter FCS is not supported\n");
4854 MLX5_SET(rqc
, rqc
, scatter_fcs
, 1);
4856 if (init_attr
->create_flags
& IB_WQ_FLAGS_DELAY_DROP
) {
4857 if (!(dev
->ib_dev
.attrs
.raw_packet_caps
&
4858 IB_RAW_PACKET_CAP_DELAY_DROP
)) {
4859 mlx5_ib_dbg(dev
, "Delay drop is not supported\n");
4863 MLX5_SET(rqc
, rqc
, delay_drop_en
, 1);
4865 rq_pas0
= (__be64
*)MLX5_ADDR_OF(wq
, wq
, pas
);
4866 mlx5_ib_populate_pas(rwq
->umem
, 1UL << rwq
->page_shift
, rq_pas0
, 0);
4867 err
= mlx5_core_create_rq_tracked(dev
, in
, inlen
, &rwq
->core_qp
);
4868 if (!err
&& init_attr
->create_flags
& IB_WQ_FLAGS_DELAY_DROP
) {
4869 err
= set_delay_drop(dev
);
4871 mlx5_ib_warn(dev
, "Failed to enable delay drop err=%d\n",
4873 mlx5_core_destroy_rq_tracked(dev
, &rwq
->core_qp
);
4875 rwq
->create_flags
|= MLX5_IB_WQ_FLAGS_DELAY_DROP
;
4883 static int set_user_rq_size(struct mlx5_ib_dev
*dev
,
4884 struct ib_wq_init_attr
*wq_init_attr
,
4885 struct mlx5_ib_create_wq
*ucmd
,
4886 struct mlx5_ib_rwq
*rwq
)
4888 /* Sanity check RQ size before proceeding */
4889 if (wq_init_attr
->max_wr
> (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_wq_sz
)))
4892 if (!ucmd
->rq_wqe_count
)
4895 rwq
->wqe_count
= ucmd
->rq_wqe_count
;
4896 rwq
->wqe_shift
= ucmd
->rq_wqe_shift
;
4897 if (check_shl_overflow(rwq
->wqe_count
, rwq
->wqe_shift
, &rwq
->buf_size
))
4900 rwq
->log_rq_stride
= rwq
->wqe_shift
;
4901 rwq
->log_rq_size
= ilog2(rwq
->wqe_count
);
4905 static bool log_of_strides_valid(struct mlx5_ib_dev
*dev
, u32 log_num_strides
)
4907 if ((log_num_strides
> MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES
) ||
4908 (log_num_strides
< MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES
))
4911 if (!MLX5_CAP_GEN(dev
->mdev
, ext_stride_num_range
) &&
4912 (log_num_strides
< MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES
))
4918 static int prepare_user_rq(struct ib_pd
*pd
,
4919 struct ib_wq_init_attr
*init_attr
,
4920 struct ib_udata
*udata
,
4921 struct mlx5_ib_rwq
*rwq
)
4923 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
4924 struct mlx5_ib_create_wq ucmd
= {};
4926 size_t required_cmd_sz
;
4928 required_cmd_sz
= offsetofend(struct mlx5_ib_create_wq
,
4929 single_stride_log_num_of_bytes
);
4930 if (udata
->inlen
< required_cmd_sz
) {
4931 mlx5_ib_dbg(dev
, "invalid inlen\n");
4935 if (udata
->inlen
> sizeof(ucmd
) &&
4936 !ib_is_udata_cleared(udata
, sizeof(ucmd
),
4937 udata
->inlen
- sizeof(ucmd
))) {
4938 mlx5_ib_dbg(dev
, "inlen is not supported\n");
4942 if (ib_copy_from_udata(&ucmd
, udata
, min(sizeof(ucmd
), udata
->inlen
))) {
4943 mlx5_ib_dbg(dev
, "copy failed\n");
4947 if (ucmd
.comp_mask
& (~MLX5_IB_CREATE_WQ_STRIDING_RQ
)) {
4948 mlx5_ib_dbg(dev
, "invalid comp mask\n");
4950 } else if (ucmd
.comp_mask
& MLX5_IB_CREATE_WQ_STRIDING_RQ
) {
4951 if (!MLX5_CAP_GEN(dev
->mdev
, striding_rq
)) {
4952 mlx5_ib_dbg(dev
, "Striding RQ is not supported\n");
4955 if ((ucmd
.single_stride_log_num_of_bytes
<
4956 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES
) ||
4957 (ucmd
.single_stride_log_num_of_bytes
>
4958 MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES
)) {
4959 mlx5_ib_dbg(dev
, "Invalid log stride size (%u. Range is %u - %u)\n",
4960 ucmd
.single_stride_log_num_of_bytes
,
4961 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES
,
4962 MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES
);
4965 if (!log_of_strides_valid(dev
,
4966 ucmd
.single_wqe_log_num_of_strides
)) {
4969 "Invalid log num strides (%u. Range is %u - %u)\n",
4970 ucmd
.single_wqe_log_num_of_strides
,
4971 MLX5_CAP_GEN(dev
->mdev
, ext_stride_num_range
) ?
4972 MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES
:
4973 MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES
,
4974 MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES
);
4977 rwq
->single_stride_log_num_of_bytes
=
4978 ucmd
.single_stride_log_num_of_bytes
;
4979 rwq
->log_num_strides
= ucmd
.single_wqe_log_num_of_strides
;
4980 rwq
->two_byte_shift_en
= !!ucmd
.two_byte_shift_en
;
4981 rwq
->create_flags
|= MLX5_IB_WQ_FLAGS_STRIDING_RQ
;
4984 err
= set_user_rq_size(dev
, init_attr
, &ucmd
, rwq
);
4986 mlx5_ib_dbg(dev
, "err %d\n", err
);
4990 err
= create_user_rq(dev
, pd
, udata
, rwq
, &ucmd
);
4992 mlx5_ib_dbg(dev
, "err %d\n", err
);
4996 rwq
->user_index
= ucmd
.user_index
;
5000 struct ib_wq
*mlx5_ib_create_wq(struct ib_pd
*pd
,
5001 struct ib_wq_init_attr
*init_attr
,
5002 struct ib_udata
*udata
)
5004 struct mlx5_ib_dev
*dev
;
5005 struct mlx5_ib_rwq
*rwq
;
5006 struct mlx5_ib_create_wq_resp resp
= {};
5007 size_t min_resp_len
;
5011 return ERR_PTR(-ENOSYS
);
5013 min_resp_len
= offsetofend(struct mlx5_ib_create_wq_resp
, reserved
);
5014 if (udata
->outlen
&& udata
->outlen
< min_resp_len
)
5015 return ERR_PTR(-EINVAL
);
5017 if (!capable(CAP_SYS_RAWIO
) &&
5018 init_attr
->create_flags
& IB_WQ_FLAGS_DELAY_DROP
)
5019 return ERR_PTR(-EPERM
);
5021 dev
= to_mdev(pd
->device
);
5022 switch (init_attr
->wq_type
) {
5024 rwq
= kzalloc(sizeof(*rwq
), GFP_KERNEL
);
5026 return ERR_PTR(-ENOMEM
);
5027 err
= prepare_user_rq(pd
, init_attr
, udata
, rwq
);
5030 err
= create_rq(rwq
, pd
, init_attr
);
5035 mlx5_ib_dbg(dev
, "unsupported wq type %d\n",
5036 init_attr
->wq_type
);
5037 return ERR_PTR(-EINVAL
);
5040 rwq
->ibwq
.wq_num
= rwq
->core_qp
.qpn
;
5041 rwq
->ibwq
.state
= IB_WQS_RESET
;
5042 if (udata
->outlen
) {
5043 resp
.response_length
= offsetofend(
5044 struct mlx5_ib_create_wq_resp
, response_length
);
5045 err
= ib_copy_to_udata(udata
, &resp
, resp
.response_length
);
5050 rwq
->core_qp
.event
= mlx5_ib_wq_event
;
5051 rwq
->ibwq
.event_handler
= init_attr
->event_handler
;
5055 mlx5_core_destroy_rq_tracked(dev
, &rwq
->core_qp
);
5057 destroy_user_rq(dev
, pd
, rwq
, udata
);
5060 return ERR_PTR(err
);
5063 int mlx5_ib_destroy_wq(struct ib_wq
*wq
, struct ib_udata
*udata
)
5065 struct mlx5_ib_dev
*dev
= to_mdev(wq
->device
);
5066 struct mlx5_ib_rwq
*rwq
= to_mrwq(wq
);
5069 ret
= mlx5_core_destroy_rq_tracked(dev
, &rwq
->core_qp
);
5072 destroy_user_rq(dev
, wq
->pd
, rwq
, udata
);
5077 int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table
*ib_rwq_ind_table
,
5078 struct ib_rwq_ind_table_init_attr
*init_attr
,
5079 struct ib_udata
*udata
)
5081 struct mlx5_ib_rwq_ind_table
*rwq_ind_tbl
=
5082 to_mrwq_ind_table(ib_rwq_ind_table
);
5083 struct mlx5_ib_dev
*dev
= to_mdev(ib_rwq_ind_table
->device
);
5084 int sz
= 1 << init_attr
->log_ind_tbl_size
;
5085 struct mlx5_ib_create_rwq_ind_tbl_resp resp
= {};
5086 size_t min_resp_len
;
5093 if (udata
->inlen
> 0 &&
5094 !ib_is_udata_cleared(udata
, 0,
5098 if (init_attr
->log_ind_tbl_size
>
5099 MLX5_CAP_GEN(dev
->mdev
, log_max_rqt_size
)) {
5100 mlx5_ib_dbg(dev
, "log_ind_tbl_size = %d is bigger than supported = %d\n",
5101 init_attr
->log_ind_tbl_size
,
5102 MLX5_CAP_GEN(dev
->mdev
, log_max_rqt_size
));
5107 offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp
, reserved
);
5108 if (udata
->outlen
&& udata
->outlen
< min_resp_len
)
5111 inlen
= MLX5_ST_SZ_BYTES(create_rqt_in
) + sizeof(u32
) * sz
;
5112 in
= kvzalloc(inlen
, GFP_KERNEL
);
5116 rqtc
= MLX5_ADDR_OF(create_rqt_in
, in
, rqt_context
);
5118 MLX5_SET(rqtc
, rqtc
, rqt_actual_size
, sz
);
5119 MLX5_SET(rqtc
, rqtc
, rqt_max_size
, sz
);
5121 for (i
= 0; i
< sz
; i
++)
5122 MLX5_SET(rqtc
, rqtc
, rq_num
[i
], init_attr
->ind_tbl
[i
]->wq_num
);
5124 rwq_ind_tbl
->uid
= to_mpd(init_attr
->ind_tbl
[0]->pd
)->uid
;
5125 MLX5_SET(create_rqt_in
, in
, uid
, rwq_ind_tbl
->uid
);
5127 err
= mlx5_core_create_rqt(dev
->mdev
, in
, inlen
, &rwq_ind_tbl
->rqtn
);
5132 rwq_ind_tbl
->ib_rwq_ind_tbl
.ind_tbl_num
= rwq_ind_tbl
->rqtn
;
5133 if (udata
->outlen
) {
5134 resp
.response_length
=
5135 offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp
,
5137 err
= ib_copy_to_udata(udata
, &resp
, resp
.response_length
);
5145 mlx5_cmd_destroy_rqt(dev
->mdev
, rwq_ind_tbl
->rqtn
, rwq_ind_tbl
->uid
);
5149 int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table
*ib_rwq_ind_tbl
)
5151 struct mlx5_ib_rwq_ind_table
*rwq_ind_tbl
= to_mrwq_ind_table(ib_rwq_ind_tbl
);
5152 struct mlx5_ib_dev
*dev
= to_mdev(ib_rwq_ind_tbl
->device
);
5154 return mlx5_cmd_destroy_rqt(dev
->mdev
, rwq_ind_tbl
->rqtn
, rwq_ind_tbl
->uid
);
5157 int mlx5_ib_modify_wq(struct ib_wq
*wq
, struct ib_wq_attr
*wq_attr
,
5158 u32 wq_attr_mask
, struct ib_udata
*udata
)
5160 struct mlx5_ib_dev
*dev
= to_mdev(wq
->device
);
5161 struct mlx5_ib_rwq
*rwq
= to_mrwq(wq
);
5162 struct mlx5_ib_modify_wq ucmd
= {};
5163 size_t required_cmd_sz
;
5171 required_cmd_sz
= offsetofend(struct mlx5_ib_modify_wq
, reserved
);
5172 if (udata
->inlen
< required_cmd_sz
)
5175 if (udata
->inlen
> sizeof(ucmd
) &&
5176 !ib_is_udata_cleared(udata
, sizeof(ucmd
),
5177 udata
->inlen
- sizeof(ucmd
)))
5180 if (ib_copy_from_udata(&ucmd
, udata
, min(sizeof(ucmd
), udata
->inlen
)))
5183 if (ucmd
.comp_mask
|| ucmd
.reserved
)
5186 inlen
= MLX5_ST_SZ_BYTES(modify_rq_in
);
5187 in
= kvzalloc(inlen
, GFP_KERNEL
);
5191 rqc
= MLX5_ADDR_OF(modify_rq_in
, in
, ctx
);
5193 curr_wq_state
= (wq_attr_mask
& IB_WQ_CUR_STATE
) ?
5194 wq_attr
->curr_wq_state
: wq
->state
;
5195 wq_state
= (wq_attr_mask
& IB_WQ_STATE
) ?
5196 wq_attr
->wq_state
: curr_wq_state
;
5197 if (curr_wq_state
== IB_WQS_ERR
)
5198 curr_wq_state
= MLX5_RQC_STATE_ERR
;
5199 if (wq_state
== IB_WQS_ERR
)
5200 wq_state
= MLX5_RQC_STATE_ERR
;
5201 MLX5_SET(modify_rq_in
, in
, rq_state
, curr_wq_state
);
5202 MLX5_SET(modify_rq_in
, in
, uid
, to_mpd(wq
->pd
)->uid
);
5203 MLX5_SET(rqc
, rqc
, state
, wq_state
);
5205 if (wq_attr_mask
& IB_WQ_FLAGS
) {
5206 if (wq_attr
->flags_mask
& IB_WQ_FLAGS_CVLAN_STRIPPING
) {
5207 if (!(MLX5_CAP_GEN(dev
->mdev
, eth_net_offloads
) &&
5208 MLX5_CAP_ETH(dev
->mdev
, vlan_cap
))) {
5209 mlx5_ib_dbg(dev
, "VLAN offloads are not "
5214 MLX5_SET64(modify_rq_in
, in
, modify_bitmask
,
5215 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD
);
5216 MLX5_SET(rqc
, rqc
, vsd
,
5217 (wq_attr
->flags
& IB_WQ_FLAGS_CVLAN_STRIPPING
) ? 0 : 1);
5220 if (wq_attr
->flags_mask
& IB_WQ_FLAGS_PCI_WRITE_END_PADDING
) {
5221 mlx5_ib_dbg(dev
, "Modifying scatter end padding is not supported\n");
5227 if (curr_wq_state
== IB_WQS_RESET
&& wq_state
== IB_WQS_RDY
) {
5230 set_id
= mlx5_ib_get_counters_id(dev
, 0);
5231 if (MLX5_CAP_GEN(dev
->mdev
, modify_rq_counter_set_id
)) {
5232 MLX5_SET64(modify_rq_in
, in
, modify_bitmask
,
5233 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID
);
5234 MLX5_SET(rqc
, rqc
, counter_set_id
, set_id
);
5238 "Receive WQ counters are not supported on current FW\n");
5241 err
= mlx5_core_modify_rq(dev
->mdev
, rwq
->core_qp
.qpn
, in
);
5243 rwq
->ibwq
.state
= (wq_state
== MLX5_RQC_STATE_ERR
) ? IB_WQS_ERR
: wq_state
;
5250 struct mlx5_ib_drain_cqe
{
5252 struct completion done
;
5255 static void mlx5_ib_drain_qp_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
5257 struct mlx5_ib_drain_cqe
*cqe
= container_of(wc
->wr_cqe
,
5258 struct mlx5_ib_drain_cqe
,
5261 complete(&cqe
->done
);
5264 /* This function returns only once the drained WR was completed */
5265 static void handle_drain_completion(struct ib_cq
*cq
,
5266 struct mlx5_ib_drain_cqe
*sdrain
,
5267 struct mlx5_ib_dev
*dev
)
5269 struct mlx5_core_dev
*mdev
= dev
->mdev
;
5271 if (cq
->poll_ctx
== IB_POLL_DIRECT
) {
5272 while (wait_for_completion_timeout(&sdrain
->done
, HZ
/ 10) <= 0)
5273 ib_process_cq_direct(cq
, -1);
5277 if (mdev
->state
== MLX5_DEVICE_STATE_INTERNAL_ERROR
) {
5278 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
5279 bool triggered
= false;
5280 unsigned long flags
;
5282 spin_lock_irqsave(&dev
->reset_flow_resource_lock
, flags
);
5283 /* Make sure that the CQ handler won't run if wasn't run yet */
5284 if (!mcq
->mcq
.reset_notify_added
)
5285 mcq
->mcq
.reset_notify_added
= 1;
5288 spin_unlock_irqrestore(&dev
->reset_flow_resource_lock
, flags
);
5291 /* Wait for any scheduled/running task to be ended */
5292 switch (cq
->poll_ctx
) {
5293 case IB_POLL_SOFTIRQ
:
5294 irq_poll_disable(&cq
->iop
);
5295 irq_poll_enable(&cq
->iop
);
5297 case IB_POLL_WORKQUEUE
:
5298 cancel_work_sync(&cq
->work
);
5305 /* Run the CQ handler - this makes sure that the drain WR will
5306 * be processed if wasn't processed yet.
5308 mcq
->mcq
.comp(&mcq
->mcq
, NULL
);
5311 wait_for_completion(&sdrain
->done
);
5314 void mlx5_ib_drain_sq(struct ib_qp
*qp
)
5316 struct ib_cq
*cq
= qp
->send_cq
;
5317 struct ib_qp_attr attr
= { .qp_state
= IB_QPS_ERR
};
5318 struct mlx5_ib_drain_cqe sdrain
;
5319 const struct ib_send_wr
*bad_swr
;
5320 struct ib_rdma_wr swr
= {
5323 { .wr_cqe
= &sdrain
.cqe
, },
5324 .opcode
= IB_WR_RDMA_WRITE
,
5328 struct mlx5_ib_dev
*dev
= to_mdev(qp
->device
);
5329 struct mlx5_core_dev
*mdev
= dev
->mdev
;
5331 ret
= ib_modify_qp(qp
, &attr
, IB_QP_STATE
);
5332 if (ret
&& mdev
->state
!= MLX5_DEVICE_STATE_INTERNAL_ERROR
) {
5333 WARN_ONCE(ret
, "failed to drain send queue: %d\n", ret
);
5337 sdrain
.cqe
.done
= mlx5_ib_drain_qp_done
;
5338 init_completion(&sdrain
.done
);
5340 ret
= mlx5_ib_post_send_drain(qp
, &swr
.wr
, &bad_swr
);
5342 WARN_ONCE(ret
, "failed to drain send queue: %d\n", ret
);
5346 handle_drain_completion(cq
, &sdrain
, dev
);
5349 void mlx5_ib_drain_rq(struct ib_qp
*qp
)
5351 struct ib_cq
*cq
= qp
->recv_cq
;
5352 struct ib_qp_attr attr
= { .qp_state
= IB_QPS_ERR
};
5353 struct mlx5_ib_drain_cqe rdrain
;
5354 struct ib_recv_wr rwr
= {};
5355 const struct ib_recv_wr
*bad_rwr
;
5357 struct mlx5_ib_dev
*dev
= to_mdev(qp
->device
);
5358 struct mlx5_core_dev
*mdev
= dev
->mdev
;
5360 ret
= ib_modify_qp(qp
, &attr
, IB_QP_STATE
);
5361 if (ret
&& mdev
->state
!= MLX5_DEVICE_STATE_INTERNAL_ERROR
) {
5362 WARN_ONCE(ret
, "failed to drain recv queue: %d\n", ret
);
5366 rwr
.wr_cqe
= &rdrain
.cqe
;
5367 rdrain
.cqe
.done
= mlx5_ib_drain_qp_done
;
5368 init_completion(&rdrain
.done
);
5370 ret
= mlx5_ib_post_recv_drain(qp
, &rwr
, &bad_rwr
);
5372 WARN_ONCE(ret
, "failed to drain recv queue: %d\n", ret
);
5376 handle_drain_completion(cq
, &rdrain
, dev
);
5380 * Bind a qp to a counter. If @counter is NULL then bind the qp to
5381 * the default counter
5383 int mlx5_ib_qp_set_counter(struct ib_qp
*qp
, struct rdma_counter
*counter
)
5385 struct mlx5_ib_dev
*dev
= to_mdev(qp
->device
);
5386 struct mlx5_ib_qp
*mqp
= to_mqp(qp
);
5389 mutex_lock(&mqp
->mutex
);
5390 if (mqp
->state
== IB_QPS_RESET
) {
5391 qp
->counter
= counter
;
5395 if (!MLX5_CAP_GEN(dev
->mdev
, rts2rts_qp_counters_set_id
)) {
5400 if (mqp
->state
== IB_QPS_RTS
) {
5401 err
= __mlx5_ib_qp_set_counter(qp
, counter
);
5403 qp
->counter
= counter
;
5408 mqp
->counter_pending
= 1;
5409 qp
->counter
= counter
;
5412 mutex_unlock(&mqp
->mutex
);