1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Work Requests exploiting Infiniband API
7 * Work requests (WR) of type ib_post_send or ib_post_recv respectively
8 * are submitted to either RC SQ or RC RQ respectively
9 * (reliably connected send/receive queue)
10 * and become work queue entries (WQEs).
11 * While an SQ WR/WQE is pending, we track it until transmission completion.
12 * Through a send or receive completion queue (CQ) respectively,
13 * we get completion queue entries (CQEs) [aka work completions (WCs)].
14 * Since the CQ callback is called from IRQ context, we split work by using
15 * bottom halves implemented by tasklets.
17 * SMC uses this to exchange LLC (link layer control)
18 * and CDC (connection data control) messages.
20 * Copyright IBM Corp. 2016
22 * Author(s): Steffen Maier <maier@linux.vnet.ibm.com>
25 #include <linux/atomic.h>
26 #include <linux/hashtable.h>
27 #include <linux/wait.h>
28 #include <rdma/ib_verbs.h>
29 #include <asm/div64.h>
34 #define SMC_WR_MAX_POLL_CQE 10 /* max. # of compl. queue elements in 1 poll */
36 #define SMC_WR_RX_HASH_BITS 4
37 static DEFINE_HASHTABLE(smc_wr_rx_hash
, SMC_WR_RX_HASH_BITS
);
38 static DEFINE_SPINLOCK(smc_wr_rx_hash_lock
);
40 struct smc_wr_tx_pend
{ /* control data for a pending send request */
41 u64 wr_id
; /* work request id sent */
42 smc_wr_tx_handler handler
;
43 enum ib_wc_status wc_status
; /* CQE status */
44 struct smc_link
*link
;
46 struct smc_wr_tx_pend_priv priv
;
49 /******************************** send queue *********************************/
51 /*------------------------------- completion --------------------------------*/
53 /* returns true if at least one tx work request is pending on the given link */
54 static inline bool smc_wr_is_tx_pend(struct smc_link
*link
)
56 if (find_first_bit(link
->wr_tx_mask
, link
->wr_tx_cnt
) !=
63 /* wait till all pending tx work requests on the given link are completed */
64 static inline int smc_wr_tx_wait_no_pending_sends(struct smc_link
*link
)
66 if (wait_event_timeout(link
->wr_tx_wait
, !smc_wr_is_tx_pend(link
),
67 SMC_WR_TX_WAIT_PENDING_TIME
))
73 static inline int smc_wr_tx_find_pending_index(struct smc_link
*link
, u64 wr_id
)
77 for (i
= 0; i
< link
->wr_tx_cnt
; i
++) {
78 if (link
->wr_tx_pends
[i
].wr_id
== wr_id
)
81 return link
->wr_tx_cnt
;
84 static inline void smc_wr_tx_process_cqe(struct ib_wc
*wc
)
86 struct smc_wr_tx_pend pnd_snd
;
87 struct smc_link
*link
;
91 link
= wc
->qp
->qp_context
;
93 if (wc
->opcode
== IB_WC_REG_MR
) {
95 link
->wr_reg_state
= FAILED
;
97 link
->wr_reg_state
= CONFIRMED
;
98 smc_wr_wakeup_reg_wait(link
);
102 pnd_snd_idx
= smc_wr_tx_find_pending_index(link
, wc
->wr_id
);
103 if (pnd_snd_idx
== link
->wr_tx_cnt
)
105 link
->wr_tx_pends
[pnd_snd_idx
].wc_status
= wc
->status
;
106 memcpy(&pnd_snd
, &link
->wr_tx_pends
[pnd_snd_idx
], sizeof(pnd_snd
));
107 /* clear the full struct smc_wr_tx_pend including .priv */
108 memset(&link
->wr_tx_pends
[pnd_snd_idx
], 0,
109 sizeof(link
->wr_tx_pends
[pnd_snd_idx
]));
110 memset(&link
->wr_tx_bufs
[pnd_snd_idx
], 0,
111 sizeof(link
->wr_tx_bufs
[pnd_snd_idx
]));
112 if (!test_and_clear_bit(pnd_snd_idx
, link
->wr_tx_mask
))
115 for_each_set_bit(i
, link
->wr_tx_mask
, link
->wr_tx_cnt
) {
116 /* clear full struct smc_wr_tx_pend including .priv */
117 memset(&link
->wr_tx_pends
[i
], 0,
118 sizeof(link
->wr_tx_pends
[i
]));
119 memset(&link
->wr_tx_bufs
[i
], 0,
120 sizeof(link
->wr_tx_bufs
[i
]));
121 clear_bit(i
, link
->wr_tx_mask
);
123 /* terminate connections of this link group abnormally */
124 smc_lgr_terminate_sched(smc_get_lgr(link
));
127 pnd_snd
.handler(&pnd_snd
.priv
, link
, wc
->status
);
128 wake_up(&link
->wr_tx_wait
);
131 static void smc_wr_tx_tasklet_fn(unsigned long data
)
133 struct smc_ib_device
*dev
= (struct smc_ib_device
*)data
;
134 struct ib_wc wc
[SMC_WR_MAX_POLL_CQE
];
141 memset(&wc
, 0, sizeof(wc
));
142 rc
= ib_poll_cq(dev
->roce_cq_send
, SMC_WR_MAX_POLL_CQE
, wc
);
144 ib_req_notify_cq(dev
->roce_cq_send
,
146 IB_CQ_REPORT_MISSED_EVENTS
);
150 for (i
= 0; i
< rc
; i
++)
151 smc_wr_tx_process_cqe(&wc
[i
]);
157 void smc_wr_tx_cq_handler(struct ib_cq
*ib_cq
, void *cq_context
)
159 struct smc_ib_device
*dev
= (struct smc_ib_device
*)cq_context
;
161 tasklet_schedule(&dev
->send_tasklet
);
164 /*---------------------------- request submission ---------------------------*/
166 static inline int smc_wr_tx_get_free_slot_index(struct smc_link
*link
, u32
*idx
)
168 *idx
= link
->wr_tx_cnt
;
169 for_each_clear_bit(*idx
, link
->wr_tx_mask
, link
->wr_tx_cnt
) {
170 if (!test_and_set_bit(*idx
, link
->wr_tx_mask
))
173 *idx
= link
->wr_tx_cnt
;
178 * smc_wr_tx_get_free_slot() - returns buffer for message assembly,
179 * and sets info for pending transmit tracking
180 * @link: Pointer to smc_link used to later send the message.
181 * @handler: Send completion handler function pointer.
182 * @wr_buf: Out value returns pointer to message buffer.
183 * @wr_rdma_buf: Out value returns pointer to rdma work request.
184 * @wr_pend_priv: Out value returns pointer serving as handler context.
186 * Return: 0 on success, or -errno on error.
188 int smc_wr_tx_get_free_slot(struct smc_link
*link
,
189 smc_wr_tx_handler handler
,
190 struct smc_wr_buf
**wr_buf
,
191 struct smc_rdma_wr
**wr_rdma_buf
,
192 struct smc_wr_tx_pend_priv
**wr_pend_priv
)
194 struct smc_link_group
*lgr
= smc_get_lgr(link
);
195 struct smc_wr_tx_pend
*wr_pend
;
196 u32 idx
= link
->wr_tx_cnt
;
197 struct ib_send_wr
*wr_ib
;
202 *wr_pend_priv
= NULL
;
203 if (in_softirq() || lgr
->terminating
) {
204 rc
= smc_wr_tx_get_free_slot_index(link
, &idx
);
208 rc
= wait_event_interruptible_timeout(
210 link
->state
== SMC_LNK_INACTIVE
||
212 (smc_wr_tx_get_free_slot_index(link
, &idx
) != -EBUSY
),
213 SMC_WR_TX_WAIT_FREE_SLOT_TIME
);
215 /* timeout - terminate connections */
216 smc_lgr_terminate_sched(lgr
);
219 if (idx
== link
->wr_tx_cnt
)
222 wr_id
= smc_wr_tx_get_next_wr_id(link
);
223 wr_pend
= &link
->wr_tx_pends
[idx
];
224 wr_pend
->wr_id
= wr_id
;
225 wr_pend
->handler
= handler
;
226 wr_pend
->link
= link
;
228 wr_ib
= &link
->wr_tx_ibs
[idx
];
229 wr_ib
->wr_id
= wr_id
;
230 *wr_buf
= &link
->wr_tx_bufs
[idx
];
232 *wr_rdma_buf
= &link
->wr_tx_rdmas
[idx
];
233 *wr_pend_priv
= &wr_pend
->priv
;
237 int smc_wr_tx_put_slot(struct smc_link
*link
,
238 struct smc_wr_tx_pend_priv
*wr_pend_priv
)
240 struct smc_wr_tx_pend
*pend
;
242 pend
= container_of(wr_pend_priv
, struct smc_wr_tx_pend
, priv
);
243 if (pend
->idx
< link
->wr_tx_cnt
) {
246 /* clear the full struct smc_wr_tx_pend including .priv */
247 memset(&link
->wr_tx_pends
[idx
], 0,
248 sizeof(link
->wr_tx_pends
[idx
]));
249 memset(&link
->wr_tx_bufs
[idx
], 0,
250 sizeof(link
->wr_tx_bufs
[idx
]));
251 test_and_clear_bit(idx
, link
->wr_tx_mask
);
252 wake_up(&link
->wr_tx_wait
);
259 /* Send prepared WR slot via ib_post_send.
260 * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
262 int smc_wr_tx_send(struct smc_link
*link
, struct smc_wr_tx_pend_priv
*priv
)
264 struct smc_wr_tx_pend
*pend
;
267 ib_req_notify_cq(link
->smcibdev
->roce_cq_send
,
268 IB_CQ_NEXT_COMP
| IB_CQ_REPORT_MISSED_EVENTS
);
269 pend
= container_of(priv
, struct smc_wr_tx_pend
, priv
);
270 rc
= ib_post_send(link
->roce_qp
, &link
->wr_tx_ibs
[pend
->idx
], NULL
);
272 smc_wr_tx_put_slot(link
, priv
);
273 smc_lgr_terminate_sched(smc_get_lgr(link
));
278 /* Register a memory region and wait for result. */
279 int smc_wr_reg_send(struct smc_link
*link
, struct ib_mr
*mr
)
283 ib_req_notify_cq(link
->smcibdev
->roce_cq_send
,
284 IB_CQ_NEXT_COMP
| IB_CQ_REPORT_MISSED_EVENTS
);
285 link
->wr_reg_state
= POSTED
;
286 link
->wr_reg
.wr
.wr_id
= (u64
)(uintptr_t)mr
;
287 link
->wr_reg
.mr
= mr
;
288 link
->wr_reg
.key
= mr
->rkey
;
289 rc
= ib_post_send(link
->roce_qp
, &link
->wr_reg
.wr
, NULL
);
293 rc
= wait_event_interruptible_timeout(link
->wr_reg_wait
,
294 (link
->wr_reg_state
!= POSTED
),
295 SMC_WR_REG_MR_WAIT_TIME
);
297 /* timeout - terminate connections */
298 smc_lgr_terminate_sched(smc_get_lgr(link
));
301 if (rc
== -ERESTARTSYS
)
303 switch (link
->wr_reg_state
) {
317 void smc_wr_tx_dismiss_slots(struct smc_link
*link
, u8 wr_tx_hdr_type
,
318 smc_wr_tx_filter filter
,
319 smc_wr_tx_dismisser dismisser
,
322 struct smc_wr_tx_pend_priv
*tx_pend
;
323 struct smc_wr_rx_hdr
*wr_tx
;
326 for_each_set_bit(i
, link
->wr_tx_mask
, link
->wr_tx_cnt
) {
327 wr_tx
= (struct smc_wr_rx_hdr
*)&link
->wr_tx_bufs
[i
];
328 if (wr_tx
->type
!= wr_tx_hdr_type
)
330 tx_pend
= &link
->wr_tx_pends
[i
].priv
;
331 if (filter(tx_pend
, data
))
336 /****************************** receive queue ********************************/
338 int smc_wr_rx_register_handler(struct smc_wr_rx_handler
*handler
)
340 struct smc_wr_rx_handler
*h_iter
;
343 spin_lock(&smc_wr_rx_hash_lock
);
344 hash_for_each_possible(smc_wr_rx_hash
, h_iter
, list
, handler
->type
) {
345 if (h_iter
->type
== handler
->type
) {
350 hash_add(smc_wr_rx_hash
, &handler
->list
, handler
->type
);
352 spin_unlock(&smc_wr_rx_hash_lock
);
356 /* Demultiplex a received work request based on the message type to its handler.
357 * Relies on smc_wr_rx_hash having been completely filled before any IB WRs,
358 * and not being modified any more afterwards so we don't need to lock it.
360 static inline void smc_wr_rx_demultiplex(struct ib_wc
*wc
)
362 struct smc_link
*link
= (struct smc_link
*)wc
->qp
->qp_context
;
363 struct smc_wr_rx_handler
*handler
;
364 struct smc_wr_rx_hdr
*wr_rx
;
368 if (wc
->byte_len
< sizeof(*wr_rx
))
369 return; /* short message */
370 temp_wr_id
= wc
->wr_id
;
371 index
= do_div(temp_wr_id
, link
->wr_rx_cnt
);
372 wr_rx
= (struct smc_wr_rx_hdr
*)&link
->wr_rx_bufs
[index
];
373 hash_for_each_possible(smc_wr_rx_hash
, handler
, list
, wr_rx
->type
) {
374 if (handler
->type
== wr_rx
->type
)
375 handler
->handler(wc
, wr_rx
);
379 static inline void smc_wr_rx_process_cqes(struct ib_wc wc
[], int num
)
381 struct smc_link
*link
;
384 for (i
= 0; i
< num
; i
++) {
385 link
= wc
[i
].qp
->qp_context
;
386 if (wc
[i
].status
== IB_WC_SUCCESS
) {
387 link
->wr_rx_tstamp
= jiffies
;
388 smc_wr_rx_demultiplex(&wc
[i
]);
389 smc_wr_rx_post(link
); /* refill WR RX */
391 /* handle status errors */
392 switch (wc
[i
].status
) {
393 case IB_WC_RETRY_EXC_ERR
:
394 case IB_WC_RNR_RETRY_EXC_ERR
:
395 case IB_WC_WR_FLUSH_ERR
:
396 /* terminate connections of this link group
399 smc_lgr_terminate_sched(smc_get_lgr(link
));
402 smc_wr_rx_post(link
); /* refill WR RX */
409 static void smc_wr_rx_tasklet_fn(unsigned long data
)
411 struct smc_ib_device
*dev
= (struct smc_ib_device
*)data
;
412 struct ib_wc wc
[SMC_WR_MAX_POLL_CQE
];
419 memset(&wc
, 0, sizeof(wc
));
420 rc
= ib_poll_cq(dev
->roce_cq_recv
, SMC_WR_MAX_POLL_CQE
, wc
);
422 ib_req_notify_cq(dev
->roce_cq_recv
,
424 | IB_CQ_REPORT_MISSED_EVENTS
);
428 smc_wr_rx_process_cqes(&wc
[0], rc
);
434 void smc_wr_rx_cq_handler(struct ib_cq
*ib_cq
, void *cq_context
)
436 struct smc_ib_device
*dev
= (struct smc_ib_device
*)cq_context
;
438 tasklet_schedule(&dev
->recv_tasklet
);
441 int smc_wr_rx_post_init(struct smc_link
*link
)
446 for (i
= 0; i
< link
->wr_rx_cnt
; i
++)
447 rc
= smc_wr_rx_post(link
);
451 /***************************** init, exit, misc ******************************/
453 void smc_wr_remember_qp_attr(struct smc_link
*lnk
)
455 struct ib_qp_attr
*attr
= &lnk
->qp_attr
;
456 struct ib_qp_init_attr init_attr
;
458 memset(attr
, 0, sizeof(*attr
));
459 memset(&init_attr
, 0, sizeof(init_attr
));
460 ib_query_qp(lnk
->roce_qp
, attr
,
473 IB_QP_MIN_RNR_TIMER
|
475 IB_QP_PATH_MIG_STATE
|
480 lnk
->wr_tx_cnt
= min_t(size_t, SMC_WR_BUF_CNT
,
481 lnk
->qp_attr
.cap
.max_send_wr
);
482 lnk
->wr_rx_cnt
= min_t(size_t, SMC_WR_BUF_CNT
* 3,
483 lnk
->qp_attr
.cap
.max_recv_wr
);
486 static void smc_wr_init_sge(struct smc_link
*lnk
)
490 for (i
= 0; i
< lnk
->wr_tx_cnt
; i
++) {
491 lnk
->wr_tx_sges
[i
].addr
=
492 lnk
->wr_tx_dma_addr
+ i
* SMC_WR_BUF_SIZE
;
493 lnk
->wr_tx_sges
[i
].length
= SMC_WR_TX_SIZE
;
494 lnk
->wr_tx_sges
[i
].lkey
= lnk
->roce_pd
->local_dma_lkey
;
495 lnk
->wr_tx_rdma_sges
[i
].tx_rdma_sge
[0].wr_tx_rdma_sge
[0].lkey
=
496 lnk
->roce_pd
->local_dma_lkey
;
497 lnk
->wr_tx_rdma_sges
[i
].tx_rdma_sge
[0].wr_tx_rdma_sge
[1].lkey
=
498 lnk
->roce_pd
->local_dma_lkey
;
499 lnk
->wr_tx_rdma_sges
[i
].tx_rdma_sge
[1].wr_tx_rdma_sge
[0].lkey
=
500 lnk
->roce_pd
->local_dma_lkey
;
501 lnk
->wr_tx_rdma_sges
[i
].tx_rdma_sge
[1].wr_tx_rdma_sge
[1].lkey
=
502 lnk
->roce_pd
->local_dma_lkey
;
503 lnk
->wr_tx_ibs
[i
].next
= NULL
;
504 lnk
->wr_tx_ibs
[i
].sg_list
= &lnk
->wr_tx_sges
[i
];
505 lnk
->wr_tx_ibs
[i
].num_sge
= 1;
506 lnk
->wr_tx_ibs
[i
].opcode
= IB_WR_SEND
;
507 lnk
->wr_tx_ibs
[i
].send_flags
=
508 IB_SEND_SIGNALED
| IB_SEND_SOLICITED
;
509 lnk
->wr_tx_rdmas
[i
].wr_tx_rdma
[0].wr
.opcode
= IB_WR_RDMA_WRITE
;
510 lnk
->wr_tx_rdmas
[i
].wr_tx_rdma
[1].wr
.opcode
= IB_WR_RDMA_WRITE
;
511 lnk
->wr_tx_rdmas
[i
].wr_tx_rdma
[0].wr
.sg_list
=
512 lnk
->wr_tx_rdma_sges
[i
].tx_rdma_sge
[0].wr_tx_rdma_sge
;
513 lnk
->wr_tx_rdmas
[i
].wr_tx_rdma
[1].wr
.sg_list
=
514 lnk
->wr_tx_rdma_sges
[i
].tx_rdma_sge
[1].wr_tx_rdma_sge
;
516 for (i
= 0; i
< lnk
->wr_rx_cnt
; i
++) {
517 lnk
->wr_rx_sges
[i
].addr
=
518 lnk
->wr_rx_dma_addr
+ i
* SMC_WR_BUF_SIZE
;
519 lnk
->wr_rx_sges
[i
].length
= SMC_WR_BUF_SIZE
;
520 lnk
->wr_rx_sges
[i
].lkey
= lnk
->roce_pd
->local_dma_lkey
;
521 lnk
->wr_rx_ibs
[i
].next
= NULL
;
522 lnk
->wr_rx_ibs
[i
].sg_list
= &lnk
->wr_rx_sges
[i
];
523 lnk
->wr_rx_ibs
[i
].num_sge
= 1;
525 lnk
->wr_reg
.wr
.next
= NULL
;
526 lnk
->wr_reg
.wr
.num_sge
= 0;
527 lnk
->wr_reg
.wr
.send_flags
= IB_SEND_SIGNALED
;
528 lnk
->wr_reg
.wr
.opcode
= IB_WR_REG_MR
;
529 lnk
->wr_reg
.access
= IB_ACCESS_LOCAL_WRITE
| IB_ACCESS_REMOTE_WRITE
;
532 void smc_wr_free_link(struct smc_link
*lnk
)
534 struct ib_device
*ibdev
;
536 if (smc_wr_tx_wait_no_pending_sends(lnk
))
537 memset(lnk
->wr_tx_mask
, 0,
538 BITS_TO_LONGS(SMC_WR_BUF_CNT
) *
539 sizeof(*lnk
->wr_tx_mask
));
543 ibdev
= lnk
->smcibdev
->ibdev
;
545 if (lnk
->wr_rx_dma_addr
) {
546 ib_dma_unmap_single(ibdev
, lnk
->wr_rx_dma_addr
,
547 SMC_WR_BUF_SIZE
* lnk
->wr_rx_cnt
,
549 lnk
->wr_rx_dma_addr
= 0;
551 if (lnk
->wr_tx_dma_addr
) {
552 ib_dma_unmap_single(ibdev
, lnk
->wr_tx_dma_addr
,
553 SMC_WR_BUF_SIZE
* lnk
->wr_tx_cnt
,
555 lnk
->wr_tx_dma_addr
= 0;
559 void smc_wr_free_link_mem(struct smc_link
*lnk
)
561 kfree(lnk
->wr_tx_pends
);
562 lnk
->wr_tx_pends
= NULL
;
563 kfree(lnk
->wr_tx_mask
);
564 lnk
->wr_tx_mask
= NULL
;
565 kfree(lnk
->wr_tx_sges
);
566 lnk
->wr_tx_sges
= NULL
;
567 kfree(lnk
->wr_tx_rdma_sges
);
568 lnk
->wr_tx_rdma_sges
= NULL
;
569 kfree(lnk
->wr_rx_sges
);
570 lnk
->wr_rx_sges
= NULL
;
571 kfree(lnk
->wr_tx_rdmas
);
572 lnk
->wr_tx_rdmas
= NULL
;
573 kfree(lnk
->wr_rx_ibs
);
574 lnk
->wr_rx_ibs
= NULL
;
575 kfree(lnk
->wr_tx_ibs
);
576 lnk
->wr_tx_ibs
= NULL
;
577 kfree(lnk
->wr_tx_bufs
);
578 lnk
->wr_tx_bufs
= NULL
;
579 kfree(lnk
->wr_rx_bufs
);
580 lnk
->wr_rx_bufs
= NULL
;
583 int smc_wr_alloc_link_mem(struct smc_link
*link
)
585 /* allocate link related memory */
586 link
->wr_tx_bufs
= kcalloc(SMC_WR_BUF_CNT
, SMC_WR_BUF_SIZE
, GFP_KERNEL
);
587 if (!link
->wr_tx_bufs
)
589 link
->wr_rx_bufs
= kcalloc(SMC_WR_BUF_CNT
* 3, SMC_WR_BUF_SIZE
,
591 if (!link
->wr_rx_bufs
)
592 goto no_mem_wr_tx_bufs
;
593 link
->wr_tx_ibs
= kcalloc(SMC_WR_BUF_CNT
, sizeof(link
->wr_tx_ibs
[0]),
595 if (!link
->wr_tx_ibs
)
596 goto no_mem_wr_rx_bufs
;
597 link
->wr_rx_ibs
= kcalloc(SMC_WR_BUF_CNT
* 3,
598 sizeof(link
->wr_rx_ibs
[0]),
600 if (!link
->wr_rx_ibs
)
601 goto no_mem_wr_tx_ibs
;
602 link
->wr_tx_rdmas
= kcalloc(SMC_WR_BUF_CNT
,
603 sizeof(link
->wr_tx_rdmas
[0]),
605 if (!link
->wr_tx_rdmas
)
606 goto no_mem_wr_rx_ibs
;
607 link
->wr_tx_rdma_sges
= kcalloc(SMC_WR_BUF_CNT
,
608 sizeof(link
->wr_tx_rdma_sges
[0]),
610 if (!link
->wr_tx_rdma_sges
)
611 goto no_mem_wr_tx_rdmas
;
612 link
->wr_tx_sges
= kcalloc(SMC_WR_BUF_CNT
, sizeof(link
->wr_tx_sges
[0]),
614 if (!link
->wr_tx_sges
)
615 goto no_mem_wr_tx_rdma_sges
;
616 link
->wr_rx_sges
= kcalloc(SMC_WR_BUF_CNT
* 3,
617 sizeof(link
->wr_rx_sges
[0]),
619 if (!link
->wr_rx_sges
)
620 goto no_mem_wr_tx_sges
;
621 link
->wr_tx_mask
= kcalloc(BITS_TO_LONGS(SMC_WR_BUF_CNT
),
622 sizeof(*link
->wr_tx_mask
),
624 if (!link
->wr_tx_mask
)
625 goto no_mem_wr_rx_sges
;
626 link
->wr_tx_pends
= kcalloc(SMC_WR_BUF_CNT
,
627 sizeof(link
->wr_tx_pends
[0]),
629 if (!link
->wr_tx_pends
)
630 goto no_mem_wr_tx_mask
;
634 kfree(link
->wr_tx_mask
);
636 kfree(link
->wr_rx_sges
);
638 kfree(link
->wr_tx_sges
);
639 no_mem_wr_tx_rdma_sges
:
640 kfree(link
->wr_tx_rdma_sges
);
642 kfree(link
->wr_tx_rdmas
);
644 kfree(link
->wr_rx_ibs
);
646 kfree(link
->wr_tx_ibs
);
648 kfree(link
->wr_rx_bufs
);
650 kfree(link
->wr_tx_bufs
);
655 void smc_wr_remove_dev(struct smc_ib_device
*smcibdev
)
657 tasklet_kill(&smcibdev
->recv_tasklet
);
658 tasklet_kill(&smcibdev
->send_tasklet
);
661 void smc_wr_add_dev(struct smc_ib_device
*smcibdev
)
663 tasklet_init(&smcibdev
->recv_tasklet
, smc_wr_rx_tasklet_fn
,
664 (unsigned long)smcibdev
);
665 tasklet_init(&smcibdev
->send_tasklet
, smc_wr_tx_tasklet_fn
,
666 (unsigned long)smcibdev
);
669 int smc_wr_create_link(struct smc_link
*lnk
)
671 struct ib_device
*ibdev
= lnk
->smcibdev
->ibdev
;
674 smc_wr_tx_set_wr_id(&lnk
->wr_tx_id
, 0);
676 lnk
->wr_rx_dma_addr
= ib_dma_map_single(
677 ibdev
, lnk
->wr_rx_bufs
, SMC_WR_BUF_SIZE
* lnk
->wr_rx_cnt
,
679 if (ib_dma_mapping_error(ibdev
, lnk
->wr_rx_dma_addr
)) {
680 lnk
->wr_rx_dma_addr
= 0;
684 lnk
->wr_tx_dma_addr
= ib_dma_map_single(
685 ibdev
, lnk
->wr_tx_bufs
, SMC_WR_BUF_SIZE
* lnk
->wr_tx_cnt
,
687 if (ib_dma_mapping_error(ibdev
, lnk
->wr_tx_dma_addr
)) {
691 smc_wr_init_sge(lnk
);
692 memset(lnk
->wr_tx_mask
, 0,
693 BITS_TO_LONGS(SMC_WR_BUF_CNT
) * sizeof(*lnk
->wr_tx_mask
));
694 init_waitqueue_head(&lnk
->wr_tx_wait
);
695 init_waitqueue_head(&lnk
->wr_reg_wait
);
699 ib_dma_unmap_single(ibdev
, lnk
->wr_rx_dma_addr
,
700 SMC_WR_BUF_SIZE
* lnk
->wr_rx_cnt
,
702 lnk
->wr_rx_dma_addr
= 0;