1 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
3 * Copyright(c) 2018 Intel Corporation.
11 #define IB_BTHE_E BIT(IB_BTHE_E_SHIFT)
13 #define OPFN_CODE(code) BIT((code) - 1)
14 #define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
16 struct hfi1_opfn_type
{
17 bool (*request
)(struct rvt_qp
*qp
, u64
*data
);
18 bool (*response
)(struct rvt_qp
*qp
, u64
*data
);
19 bool (*reply
)(struct rvt_qp
*qp
, u64 data
);
20 void (*error
)(struct rvt_qp
*qp
);
23 static struct hfi1_opfn_type hfi1_opfn_handlers
[STL_VERBS_EXTD_MAX
] = {
24 [STL_VERBS_EXTD_TID_RDMA
] = {
25 .request
= tid_rdma_conn_req
,
26 .response
= tid_rdma_conn_resp
,
27 .reply
= tid_rdma_conn_reply
,
28 .error
= tid_rdma_conn_error
,
32 static struct workqueue_struct
*opfn_wq
;
34 static void opfn_schedule_conn_request(struct rvt_qp
*qp
);
36 static bool hfi1_opfn_extended(u32 bth1
)
38 return !!(bth1
& IB_BTHE_E
);
41 static void opfn_conn_request(struct rvt_qp
*qp
)
43 struct hfi1_qp_priv
*priv
= qp
->priv
;
44 struct ib_atomic_wr wr
;
46 struct hfi1_opfn_type
*extd
;
51 trace_hfi1_opfn_state_conn_request(qp
);
52 spin_lock_irqsave(&priv
->opfn
.lock
, flags
);
54 * Exit if the extended bit is not set, or if nothing is requested, or
55 * if we have completed all requests, or if a previous request is in
58 if (!priv
->opfn
.extended
|| !priv
->opfn
.requested
||
59 priv
->opfn
.requested
== priv
->opfn
.completed
|| priv
->opfn
.curr
)
62 mask
= priv
->opfn
.requested
& ~priv
->opfn
.completed
;
63 capcode
= ilog2(mask
& ~(mask
- 1)) + 1;
64 if (capcode
>= STL_VERBS_EXTD_MAX
) {
65 priv
->opfn
.completed
|= OPFN_CODE(capcode
);
69 extd
= &hfi1_opfn_handlers
[capcode
];
70 if (!extd
|| !extd
->request
|| !extd
->request(qp
, &data
)) {
72 * Either there is no handler for this capability or the request
73 * packet could not be generated. Either way, mark it as done so
74 * we don't keep attempting to complete it.
76 priv
->opfn
.completed
|= OPFN_CODE(capcode
);
80 trace_hfi1_opfn_data_conn_request(qp
, capcode
, data
);
81 data
= (data
& ~0xf) | capcode
;
83 memset(&wr
, 0, sizeof(wr
));
84 wr
.wr
.opcode
= IB_WR_OPFN
;
85 wr
.remote_addr
= HFI1_VERBS_E_ATOMIC_VADDR
;
86 wr
.compare_add
= data
;
88 priv
->opfn
.curr
= capcode
; /* A new request is now in progress */
89 /* Drop opfn.lock before calling ib_post_send() */
90 spin_unlock_irqrestore(&priv
->opfn
.lock
, flags
);
92 ret
= ib_post_send(&qp
->ibqp
, &wr
.wr
, NULL
);
95 trace_hfi1_opfn_state_conn_request(qp
);
98 trace_hfi1_msg_opfn_conn_request(qp
, "ib_ost_send failed: ret = ",
100 spin_lock_irqsave(&priv
->opfn
.lock
, flags
);
102 * In case of an unexpected error return from ib_post_send
103 * clear opfn.curr and reschedule to try again
105 priv
->opfn
.curr
= STL_VERBS_EXTD_NONE
;
106 opfn_schedule_conn_request(qp
);
108 spin_unlock_irqrestore(&priv
->opfn
.lock
, flags
);
111 void opfn_send_conn_request(struct work_struct
*work
)
113 struct hfi1_opfn_data
*od
;
114 struct hfi1_qp_priv
*qpriv
;
116 od
= container_of(work
, struct hfi1_opfn_data
, opfn_work
);
117 qpriv
= container_of(od
, struct hfi1_qp_priv
, opfn
);
119 opfn_conn_request(qpriv
->owner
);
123 * When QP s_lock is held in the caller, the OPFN request must be scheduled
124 * to a different workqueue to avoid double locking QP s_lock in call to
125 * ib_post_send in opfn_conn_request
127 static void opfn_schedule_conn_request(struct rvt_qp
*qp
)
129 struct hfi1_qp_priv
*priv
= qp
->priv
;
131 trace_hfi1_opfn_state_sched_conn_request(qp
);
132 queue_work(opfn_wq
, &priv
->opfn
.opfn_work
);
135 void opfn_conn_response(struct rvt_qp
*qp
, struct rvt_ack_entry
*e
,
136 struct ib_atomic_eth
*ateth
)
138 struct hfi1_qp_priv
*priv
= qp
->priv
;
139 u64 data
= be64_to_cpu(ateth
->compare_data
);
140 struct hfi1_opfn_type
*extd
;
144 trace_hfi1_opfn_state_conn_response(qp
);
145 capcode
= data
& 0xf;
146 trace_hfi1_opfn_data_conn_response(qp
, capcode
, data
);
147 if (!capcode
|| capcode
>= STL_VERBS_EXTD_MAX
)
150 extd
= &hfi1_opfn_handlers
[capcode
];
152 if (!extd
|| !extd
->response
) {
153 e
->atomic_data
= capcode
;
157 spin_lock_irqsave(&priv
->opfn
.lock
, flags
);
158 if (priv
->opfn
.completed
& OPFN_CODE(capcode
)) {
160 * We are receiving a request for a feature that has already
161 * been negotiated. This may mean that the other side has reset
163 priv
->opfn
.completed
&= ~OPFN_CODE(capcode
);
168 if (extd
->response(qp
, &data
))
169 priv
->opfn
.completed
|= OPFN_CODE(capcode
);
170 e
->atomic_data
= (data
& ~0xf) | capcode
;
171 trace_hfi1_opfn_state_conn_response(qp
);
172 spin_unlock_irqrestore(&priv
->opfn
.lock
, flags
);
175 void opfn_conn_reply(struct rvt_qp
*qp
, u64 data
)
177 struct hfi1_qp_priv
*priv
= qp
->priv
;
178 struct hfi1_opfn_type
*extd
;
182 trace_hfi1_opfn_state_conn_reply(qp
);
183 capcode
= data
& 0xf;
184 trace_hfi1_opfn_data_conn_reply(qp
, capcode
, data
);
185 if (!capcode
|| capcode
>= STL_VERBS_EXTD_MAX
)
188 spin_lock_irqsave(&priv
->opfn
.lock
, flags
);
190 * Either there is no previous request or the reply is not for the
193 if (!priv
->opfn
.curr
|| capcode
!= priv
->opfn
.curr
)
196 extd
= &hfi1_opfn_handlers
[capcode
];
198 if (!extd
|| !extd
->reply
)
201 if (extd
->reply(qp
, data
))
202 priv
->opfn
.completed
|= OPFN_CODE(capcode
);
205 * Clear opfn.curr to indicate that the previous request is no longer in
208 priv
->opfn
.curr
= STL_VERBS_EXTD_NONE
;
209 trace_hfi1_opfn_state_conn_reply(qp
);
211 spin_unlock_irqrestore(&priv
->opfn
.lock
, flags
);
214 void opfn_conn_error(struct rvt_qp
*qp
)
216 struct hfi1_qp_priv
*priv
= qp
->priv
;
217 struct hfi1_opfn_type
*extd
= NULL
;
221 trace_hfi1_opfn_state_conn_error(qp
);
222 trace_hfi1_msg_opfn_conn_error(qp
, "error. qp state ", (u64
)qp
->state
);
224 * The QP has gone into the Error state. We have to invalidate all
225 * negotiated feature, including the one in progress (if any). The RC
226 * QP handling will clean the WQE for the connection request.
228 spin_lock_irqsave(&priv
->opfn
.lock
, flags
);
229 while (priv
->opfn
.completed
) {
230 capcode
= priv
->opfn
.completed
& ~(priv
->opfn
.completed
- 1);
231 extd
= &hfi1_opfn_handlers
[ilog2(capcode
) + 1];
234 priv
->opfn
.completed
&= ~OPFN_CODE(capcode
);
236 priv
->opfn
.extended
= 0;
237 priv
->opfn
.requested
= 0;
238 priv
->opfn
.curr
= STL_VERBS_EXTD_NONE
;
239 spin_unlock_irqrestore(&priv
->opfn
.lock
, flags
);
242 void opfn_qp_init(struct rvt_qp
*qp
, struct ib_qp_attr
*attr
, int attr_mask
)
244 struct ib_qp
*ibqp
= &qp
->ibqp
;
245 struct hfi1_qp_priv
*priv
= qp
->priv
;
248 if (attr_mask
& IB_QP_RETRY_CNT
)
249 priv
->s_retry
= attr
->retry_cnt
;
251 spin_lock_irqsave(&priv
->opfn
.lock
, flags
);
252 if (ibqp
->qp_type
== IB_QPT_RC
&& HFI1_CAP_IS_KSET(TID_RDMA
)) {
253 struct tid_rdma_params
*local
= &priv
->tid_rdma
.local
;
255 if (attr_mask
& IB_QP_TIMEOUT
)
256 priv
->tid_retry_timeout_jiffies
= qp
->timeout_jiffies
;
257 if (qp
->pmtu
== enum_to_mtu(OPA_MTU_4096
) ||
258 qp
->pmtu
== enum_to_mtu(OPA_MTU_8192
)) {
259 tid_rdma_opfn_init(qp
, local
);
261 * We only want to set the OPFN requested bit when the
262 * QP transitions to RTS.
264 if (attr_mask
& IB_QP_STATE
&&
265 attr
->qp_state
== IB_QPS_RTS
) {
266 priv
->opfn
.requested
|= OPFN_MASK(TID_RDMA
);
268 * If the QP is transitioning to RTS and the
269 * opfn.completed for TID RDMA has already been
270 * set, the QP is being moved *back* into RTS.
271 * We can now renegotiate the TID RDMA
274 if (priv
->opfn
.completed
&
275 OPFN_MASK(TID_RDMA
)) {
276 priv
->opfn
.completed
&=
277 ~OPFN_MASK(TID_RDMA
);
279 * Since the opfn.completed bit was
280 * already set, it is safe to assume
281 * that the opfn.extended is also set.
283 opfn_schedule_conn_request(qp
);
287 memset(local
, 0, sizeof(*local
));
290 spin_unlock_irqrestore(&priv
->opfn
.lock
, flags
);
293 void opfn_trigger_conn_request(struct rvt_qp
*qp
, u32 bth1
)
295 struct hfi1_qp_priv
*priv
= qp
->priv
;
297 if (!priv
->opfn
.extended
&& hfi1_opfn_extended(bth1
) &&
298 HFI1_CAP_IS_KSET(OPFN
)) {
299 priv
->opfn
.extended
= 1;
300 if (qp
->state
== IB_QPS_RTS
)
301 opfn_conn_request(qp
);
307 opfn_wq
= alloc_workqueue("hfi_opfn",
308 WQ_SYSFS
| WQ_HIGHPRI
| WQ_CPU_INTENSIVE
|
310 HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES
);
320 destroy_workqueue(opfn_wq
);