1 // SPDX-License-Identifier: GPL-2.0-or-later
5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
11 #define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
13 #include <linux/module.h>
17 #include <rdma/ib_cm.h>
18 #include <rdma/ib_verbs.h>
19 #include "rtrs-srv-trace.h"
21 MODULE_DESCRIPTION("RDMA Transport Server");
22 MODULE_LICENSE("GPL");
24 /* Must be power of 2, see mask from mr->page_size in ib_sg_to_pages() */
25 #define DEFAULT_MAX_CHUNK_SIZE (128 << 10)
26 #define DEFAULT_SESS_QUEUE_DEPTH 512
27 #define MAX_HDR_SIZE PAGE_SIZE
29 static const struct rtrs_rdma_dev_pd_ops dev_pd_ops
;
30 static struct rtrs_rdma_dev_pd dev_pd
= {
33 const struct class rtrs_dev_class
= {
34 .name
= "rtrs-server",
36 static struct rtrs_srv_ib_ctx ib_ctx
;
38 static int __read_mostly max_chunk_size
= DEFAULT_MAX_CHUNK_SIZE
;
39 static int __read_mostly sess_queue_depth
= DEFAULT_SESS_QUEUE_DEPTH
;
41 static bool always_invalidate
= true;
42 module_param(always_invalidate
, bool, 0444);
43 MODULE_PARM_DESC(always_invalidate
,
44 "Invalidate memory registration for contiguous memory regions before accessing.");
46 module_param_named(max_chunk_size
, max_chunk_size
, int, 0444);
47 MODULE_PARM_DESC(max_chunk_size
,
48 "Max size for each IO request, when change the unit is in byte (default: "
49 __stringify(DEFAULT_MAX_CHUNK_SIZE
) "KB)");
51 module_param_named(sess_queue_depth
, sess_queue_depth
, int, 0444);
52 MODULE_PARM_DESC(sess_queue_depth
,
53 "Number of buffers for pending I/O requests to allocate per session. Maximum: "
54 __stringify(MAX_SESS_QUEUE_DEPTH
) " (default: "
55 __stringify(DEFAULT_SESS_QUEUE_DEPTH
) ")");
57 static cpumask_t cq_affinity_mask
= { CPU_BITS_ALL
};
59 static struct workqueue_struct
*rtrs_wq
;
61 static inline struct rtrs_srv_con
*to_srv_con(struct rtrs_con
*c
)
63 return container_of(c
, struct rtrs_srv_con
, c
);
66 static bool rtrs_srv_change_state(struct rtrs_srv_path
*srv_path
,
67 enum rtrs_srv_state new_state
)
69 enum rtrs_srv_state old_state
;
73 spin_lock_irqsave(&srv_path
->state_lock
, flags
);
74 old_state
= srv_path
->state
;
76 case RTRS_SRV_CONNECTED
:
77 if (old_state
== RTRS_SRV_CONNECTING
)
80 case RTRS_SRV_CLOSING
:
81 if (old_state
== RTRS_SRV_CONNECTING
||
82 old_state
== RTRS_SRV_CONNECTED
)
86 if (old_state
== RTRS_SRV_CLOSING
)
93 srv_path
->state
= new_state
;
94 spin_unlock_irqrestore(&srv_path
->state_lock
, flags
);
99 static void free_id(struct rtrs_srv_op
*id
)
106 static void rtrs_srv_free_ops_ids(struct rtrs_srv_path
*srv_path
)
108 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
111 if (srv_path
->ops_ids
) {
112 for (i
= 0; i
< srv
->queue_depth
; i
++)
113 free_id(srv_path
->ops_ids
[i
]);
114 kfree(srv_path
->ops_ids
);
115 srv_path
->ops_ids
= NULL
;
119 static void rtrs_srv_rdma_done(struct ib_cq
*cq
, struct ib_wc
*wc
);
121 static struct ib_cqe io_comp_cqe
= {
122 .done
= rtrs_srv_rdma_done
125 static inline void rtrs_srv_inflight_ref_release(struct percpu_ref
*ref
)
127 struct rtrs_srv_path
*srv_path
= container_of(ref
,
128 struct rtrs_srv_path
,
131 percpu_ref_exit(&srv_path
->ids_inflight_ref
);
132 complete(&srv_path
->complete_done
);
135 static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_path
*srv_path
)
137 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
138 struct rtrs_srv_op
*id
;
141 srv_path
->ops_ids
= kcalloc(srv
->queue_depth
,
142 sizeof(*srv_path
->ops_ids
),
144 if (!srv_path
->ops_ids
)
147 for (i
= 0; i
< srv
->queue_depth
; ++i
) {
148 id
= kzalloc(sizeof(*id
), GFP_KERNEL
);
152 srv_path
->ops_ids
[i
] = id
;
155 ret
= percpu_ref_init(&srv_path
->ids_inflight_ref
,
156 rtrs_srv_inflight_ref_release
, 0, GFP_KERNEL
);
158 pr_err("Percpu reference init failed\n");
161 init_completion(&srv_path
->complete_done
);
166 rtrs_srv_free_ops_ids(srv_path
);
170 static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_path
*srv_path
)
172 percpu_ref_get(&srv_path
->ids_inflight_ref
);
175 static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_path
*srv_path
)
177 percpu_ref_put(&srv_path
->ids_inflight_ref
);
180 static void rtrs_srv_reg_mr_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
182 struct rtrs_srv_con
*con
= to_srv_con(wc
->qp
->qp_context
);
183 struct rtrs_path
*s
= con
->c
.path
;
184 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
186 if (wc
->status
!= IB_WC_SUCCESS
) {
187 rtrs_err(s
, "REG MR failed: %s\n",
188 ib_wc_status_msg(wc
->status
));
189 close_path(srv_path
);
194 static struct ib_cqe local_reg_cqe
= {
195 .done
= rtrs_srv_reg_mr_done
198 static int rdma_write_sg(struct rtrs_srv_op
*id
)
200 struct rtrs_path
*s
= id
->con
->c
.path
;
201 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
202 dma_addr_t dma_addr
= srv_path
->dma_addr
[id
->msg_id
];
203 struct rtrs_srv_mr
*srv_mr
;
204 struct ib_send_wr inv_wr
;
205 struct ib_rdma_wr imm_wr
;
206 struct ib_rdma_wr
*wr
= NULL
;
207 enum ib_send_flags flags
;
212 struct ib_reg_wr rwr
;
213 struct ib_sge
*plist
;
216 sg_cnt
= le16_to_cpu(id
->rd_msg
->sg_cnt
);
217 need_inval
= le16_to_cpu(id
->rd_msg
->flags
) & RTRS_MSG_NEED_INVAL_F
;
225 plist
->addr
= dma_addr
+ offset
;
226 plist
->length
= le32_to_cpu(id
->rd_msg
->desc
[0].len
);
228 /* WR will fail with length error
231 if (plist
->length
== 0) {
232 rtrs_err(s
, "Invalid RDMA-Write sg list length 0\n");
236 plist
->lkey
= srv_path
->s
.dev
->ib_pd
->local_dma_lkey
;
237 offset
+= plist
->length
;
239 wr
->wr
.sg_list
= plist
;
241 wr
->remote_addr
= le64_to_cpu(id
->rd_msg
->desc
[0].addr
);
242 wr
->rkey
= le32_to_cpu(id
->rd_msg
->desc
[0].key
);
246 /* Only one key is actually used */
247 WARN_ON_ONCE(rkey
!= wr
->rkey
);
249 wr
->wr
.opcode
= IB_WR_RDMA_WRITE
;
250 wr
->wr
.wr_cqe
= &io_comp_cqe
;
251 wr
->wr
.ex
.imm_data
= 0;
252 wr
->wr
.send_flags
= 0;
254 if (need_inval
&& always_invalidate
) {
255 wr
->wr
.next
= &rwr
.wr
;
256 rwr
.wr
.next
= &inv_wr
;
257 inv_wr
.next
= &imm_wr
.wr
;
258 } else if (always_invalidate
) {
259 wr
->wr
.next
= &rwr
.wr
;
260 rwr
.wr
.next
= &imm_wr
.wr
;
261 } else if (need_inval
) {
262 wr
->wr
.next
= &inv_wr
;
263 inv_wr
.next
= &imm_wr
.wr
;
265 wr
->wr
.next
= &imm_wr
.wr
;
268 * From time to time we have to post signaled sends,
269 * or send queue will fill up and only QP reset can help.
271 flags
= (atomic_inc_return(&id
->con
->c
.wr_cnt
) % s
->signal_interval
) ?
272 0 : IB_SEND_SIGNALED
;
275 inv_wr
.sg_list
= NULL
;
277 inv_wr
.opcode
= IB_WR_SEND_WITH_INV
;
278 inv_wr
.wr_cqe
= &io_comp_cqe
;
279 inv_wr
.send_flags
= 0;
280 inv_wr
.ex
.invalidate_rkey
= rkey
;
283 imm_wr
.wr
.next
= NULL
;
284 if (always_invalidate
) {
285 struct rtrs_msg_rkey_rsp
*msg
;
287 srv_mr
= &srv_path
->mrs
[id
->msg_id
];
288 rwr
.wr
.opcode
= IB_WR_REG_MR
;
289 rwr
.wr
.wr_cqe
= &local_reg_cqe
;
292 rwr
.wr
.send_flags
= 0;
293 rwr
.key
= srv_mr
->mr
->rkey
;
294 rwr
.access
= (IB_ACCESS_LOCAL_WRITE
|
295 IB_ACCESS_REMOTE_WRITE
);
296 msg
= srv_mr
->iu
->buf
;
297 msg
->buf_id
= cpu_to_le16(id
->msg_id
);
298 msg
->type
= cpu_to_le16(RTRS_MSG_RKEY_RSP
);
299 msg
->rkey
= cpu_to_le32(srv_mr
->mr
->rkey
);
301 list
.addr
= srv_mr
->iu
->dma_addr
;
302 list
.length
= sizeof(*msg
);
303 list
.lkey
= srv_path
->s
.dev
->ib_pd
->local_dma_lkey
;
304 imm_wr
.wr
.sg_list
= &list
;
305 imm_wr
.wr
.num_sge
= 1;
306 imm_wr
.wr
.opcode
= IB_WR_SEND_WITH_IMM
;
307 ib_dma_sync_single_for_device(srv_path
->s
.dev
->ib_dev
,
308 srv_mr
->iu
->dma_addr
,
309 srv_mr
->iu
->size
, DMA_TO_DEVICE
);
311 imm_wr
.wr
.sg_list
= NULL
;
312 imm_wr
.wr
.num_sge
= 0;
313 imm_wr
.wr
.opcode
= IB_WR_RDMA_WRITE_WITH_IMM
;
315 imm_wr
.wr
.send_flags
= flags
;
316 imm_wr
.wr
.ex
.imm_data
= cpu_to_be32(rtrs_to_io_rsp_imm(id
->msg_id
,
319 imm_wr
.wr
.wr_cqe
= &io_comp_cqe
;
320 ib_dma_sync_single_for_device(srv_path
->s
.dev
->ib_dev
, dma_addr
,
321 offset
, DMA_BIDIRECTIONAL
);
323 err
= ib_post_send(id
->con
->c
.qp
, &id
->tx_wr
.wr
, NULL
);
326 "Posting RDMA-Write-Request to QP failed, err: %d\n",
333 * send_io_resp_imm() - respond to client with empty IMM on failed READ/WRITE
334 * requests or on successful WRITE request.
335 * @con: the connection to send back result
336 * @id: the id associated with the IO
337 * @errno: the error number of the IO.
339 * Return 0 on success, errno otherwise.
341 static int send_io_resp_imm(struct rtrs_srv_con
*con
, struct rtrs_srv_op
*id
,
344 struct rtrs_path
*s
= con
->c
.path
;
345 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
346 struct ib_send_wr inv_wr
, *wr
= NULL
;
347 struct ib_rdma_wr imm_wr
;
348 struct ib_reg_wr rwr
;
349 struct rtrs_srv_mr
*srv_mr
;
350 bool need_inval
= false;
351 enum ib_send_flags flags
;
355 if (id
->dir
== READ
) {
356 struct rtrs_msg_rdma_read
*rd_msg
= id
->rd_msg
;
359 need_inval
= le16_to_cpu(rd_msg
->flags
) &
360 RTRS_MSG_NEED_INVAL_F
;
361 sg_cnt
= le16_to_cpu(rd_msg
->sg_cnt
);
365 inv_wr
.wr_cqe
= &io_comp_cqe
;
366 inv_wr
.sg_list
= NULL
;
368 inv_wr
.opcode
= IB_WR_SEND_WITH_INV
;
369 inv_wr
.send_flags
= 0;
370 /* Only one key is actually used */
371 inv_wr
.ex
.invalidate_rkey
=
372 le32_to_cpu(rd_msg
->desc
[0].key
);
380 trace_send_io_resp_imm(id
, need_inval
, always_invalidate
, errno
);
382 if (need_inval
&& always_invalidate
) {
384 inv_wr
.next
= &rwr
.wr
;
385 rwr
.wr
.next
= &imm_wr
.wr
;
386 } else if (always_invalidate
) {
388 rwr
.wr
.next
= &imm_wr
.wr
;
389 } else if (need_inval
) {
391 inv_wr
.next
= &imm_wr
.wr
;
396 * From time to time we have to post signalled sends,
397 * or send queue will fill up and only QP reset can help.
399 flags
= (atomic_inc_return(&con
->c
.wr_cnt
) % s
->signal_interval
) ?
400 0 : IB_SEND_SIGNALED
;
401 imm
= rtrs_to_io_rsp_imm(id
->msg_id
, errno
, need_inval
);
402 imm_wr
.wr
.next
= NULL
;
403 if (always_invalidate
) {
405 struct rtrs_msg_rkey_rsp
*msg
;
407 srv_mr
= &srv_path
->mrs
[id
->msg_id
];
408 rwr
.wr
.next
= &imm_wr
.wr
;
409 rwr
.wr
.opcode
= IB_WR_REG_MR
;
410 rwr
.wr
.wr_cqe
= &local_reg_cqe
;
412 rwr
.wr
.send_flags
= 0;
414 rwr
.key
= srv_mr
->mr
->rkey
;
415 rwr
.access
= (IB_ACCESS_LOCAL_WRITE
|
416 IB_ACCESS_REMOTE_WRITE
);
417 msg
= srv_mr
->iu
->buf
;
418 msg
->buf_id
= cpu_to_le16(id
->msg_id
);
419 msg
->type
= cpu_to_le16(RTRS_MSG_RKEY_RSP
);
420 msg
->rkey
= cpu_to_le32(srv_mr
->mr
->rkey
);
422 list
.addr
= srv_mr
->iu
->dma_addr
;
423 list
.length
= sizeof(*msg
);
424 list
.lkey
= srv_path
->s
.dev
->ib_pd
->local_dma_lkey
;
425 imm_wr
.wr
.sg_list
= &list
;
426 imm_wr
.wr
.num_sge
= 1;
427 imm_wr
.wr
.opcode
= IB_WR_SEND_WITH_IMM
;
428 ib_dma_sync_single_for_device(srv_path
->s
.dev
->ib_dev
,
429 srv_mr
->iu
->dma_addr
,
430 srv_mr
->iu
->size
, DMA_TO_DEVICE
);
432 imm_wr
.wr
.sg_list
= NULL
;
433 imm_wr
.wr
.num_sge
= 0;
434 imm_wr
.wr
.opcode
= IB_WR_RDMA_WRITE_WITH_IMM
;
436 imm_wr
.wr
.send_flags
= flags
;
437 imm_wr
.wr
.wr_cqe
= &io_comp_cqe
;
439 imm_wr
.wr
.ex
.imm_data
= cpu_to_be32(imm
);
441 err
= ib_post_send(id
->con
->c
.qp
, wr
, NULL
);
443 rtrs_err_rl(s
, "Posting RDMA-Reply to QP failed, err: %d\n",
449 void close_path(struct rtrs_srv_path
*srv_path
)
451 if (rtrs_srv_change_state(srv_path
, RTRS_SRV_CLOSING
))
452 queue_work(rtrs_wq
, &srv_path
->close_work
);
453 WARN_ON(srv_path
->state
!= RTRS_SRV_CLOSING
);
456 static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state
)
459 case RTRS_SRV_CONNECTING
:
460 return "RTRS_SRV_CONNECTING";
461 case RTRS_SRV_CONNECTED
:
462 return "RTRS_SRV_CONNECTED";
463 case RTRS_SRV_CLOSING
:
464 return "RTRS_SRV_CLOSING";
465 case RTRS_SRV_CLOSED
:
466 return "RTRS_SRV_CLOSED";
473 * rtrs_srv_resp_rdma() - Finish an RDMA request
475 * @id: Internal RTRS operation identifier
476 * @status: Response Code sent to the other side for this operation.
477 * 0 = success, <=0 error
480 * Finish a RDMA operation. A message is sent to the client and the
481 * corresponding memory areas will be released.
483 bool rtrs_srv_resp_rdma(struct rtrs_srv_op
*id
, int status
)
485 struct rtrs_srv_path
*srv_path
;
486 struct rtrs_srv_con
*con
;
495 srv_path
= to_srv_path(s
);
499 if (srv_path
->state
!= RTRS_SRV_CONNECTED
) {
501 "Sending I/O response failed, server path %s is disconnected, path state %s\n",
502 kobject_name(&srv_path
->kobj
),
503 rtrs_srv_state_str(srv_path
->state
));
506 if (always_invalidate
) {
507 struct rtrs_srv_mr
*mr
= &srv_path
->mrs
[id
->msg_id
];
509 ib_update_fast_reg_key(mr
->mr
, ib_inc_rkey(mr
->mr
->rkey
));
511 if (atomic_sub_return(1, &con
->c
.sq_wr_avail
) < 0) {
512 rtrs_err(s
, "IB send queue full: srv_path=%s cid=%d\n",
513 kobject_name(&srv_path
->kobj
),
515 atomic_add(1, &con
->c
.sq_wr_avail
);
516 spin_lock(&con
->rsp_wr_wait_lock
);
517 list_add_tail(&id
->wait_list
, &con
->rsp_wr_wait_list
);
518 spin_unlock(&con
->rsp_wr_wait_lock
);
522 if (status
|| id
->dir
== WRITE
|| !id
->rd_msg
->sg_cnt
)
523 err
= send_io_resp_imm(con
, id
, status
);
525 err
= rdma_write_sg(id
);
528 rtrs_err_rl(s
, "IO response failed: %d: srv_path=%s\n", err
,
529 kobject_name(&srv_path
->kobj
));
530 close_path(srv_path
);
533 rtrs_srv_put_ops_ids(srv_path
);
536 EXPORT_SYMBOL(rtrs_srv_resp_rdma
);
539 * rtrs_srv_set_sess_priv() - Set private pointer in rtrs_srv.
540 * @srv: Session pointer
541 * @priv: The private pointer that is associated with the session.
543 void rtrs_srv_set_sess_priv(struct rtrs_srv_sess
*srv
, void *priv
)
547 EXPORT_SYMBOL(rtrs_srv_set_sess_priv
);
549 static void unmap_cont_bufs(struct rtrs_srv_path
*srv_path
)
553 for (i
= 0; i
< srv_path
->mrs_num
; i
++) {
554 struct rtrs_srv_mr
*srv_mr
;
556 srv_mr
= &srv_path
->mrs
[i
];
558 if (always_invalidate
)
559 rtrs_iu_free(srv_mr
->iu
, srv_path
->s
.dev
->ib_dev
, 1);
561 ib_dereg_mr(srv_mr
->mr
);
562 ib_dma_unmap_sg(srv_path
->s
.dev
->ib_dev
, srv_mr
->sgt
.sgl
,
563 srv_mr
->sgt
.nents
, DMA_BIDIRECTIONAL
);
564 sg_free_table(&srv_mr
->sgt
);
566 kfree(srv_path
->mrs
);
569 static int map_cont_bufs(struct rtrs_srv_path
*srv_path
)
571 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
572 struct rtrs_path
*ss
= &srv_path
->s
;
574 unsigned int chunk_bits
;
575 int chunks_per_mr
= 1;
577 struct sg_table
*sgt
;
580 * Here we map queue_depth chunks to MR. Firstly we have to
581 * figure out how many chunks can we map per MR.
583 if (always_invalidate
) {
585 * in order to do invalidate for each chunks of memory, we needs
586 * more memory regions.
588 mrs_num
= srv
->queue_depth
;
591 srv_path
->s
.dev
->ib_dev
->attrs
.max_fast_reg_page_list_len
;
592 mrs_num
= DIV_ROUND_UP(srv
->queue_depth
, chunks_per_mr
);
593 chunks_per_mr
= DIV_ROUND_UP(srv
->queue_depth
, mrs_num
);
596 srv_path
->mrs
= kcalloc(mrs_num
, sizeof(*srv_path
->mrs
), GFP_KERNEL
);
600 for (srv_path
->mrs_num
= 0; srv_path
->mrs_num
< mrs_num
;
601 srv_path
->mrs_num
++) {
602 struct rtrs_srv_mr
*srv_mr
= &srv_path
->mrs
[srv_path
->mrs_num
];
603 struct scatterlist
*s
;
604 int nr
, nr_sgt
, chunks
;
607 chunks
= chunks_per_mr
* srv_path
->mrs_num
;
608 if (!always_invalidate
)
609 chunks_per_mr
= min_t(int, chunks_per_mr
,
610 srv
->queue_depth
- chunks
);
612 err
= sg_alloc_table(sgt
, chunks_per_mr
, GFP_KERNEL
);
616 for_each_sg(sgt
->sgl
, s
, chunks_per_mr
, i
)
617 sg_set_page(s
, srv
->chunks
[chunks
+ i
],
620 nr_sgt
= ib_dma_map_sg(srv_path
->s
.dev
->ib_dev
, sgt
->sgl
,
621 sgt
->nents
, DMA_BIDIRECTIONAL
);
626 mr
= ib_alloc_mr(srv_path
->s
.dev
->ib_pd
, IB_MR_TYPE_MEM_REG
,
632 nr
= ib_map_mr_sg(mr
, sgt
->sgl
, nr_sgt
,
633 NULL
, max_chunk_size
);
635 err
= nr
< 0 ? nr
: -EINVAL
;
639 if (always_invalidate
) {
640 srv_mr
->iu
= rtrs_iu_alloc(1,
641 sizeof(struct rtrs_msg_rkey_rsp
),
642 GFP_KERNEL
, srv_path
->s
.dev
->ib_dev
,
643 DMA_TO_DEVICE
, rtrs_srv_rdma_done
);
646 rtrs_err(ss
, "rtrs_iu_alloc(), err: %d\n", err
);
650 /* Eventually dma addr for each chunk can be cached */
651 for_each_sg(sgt
->sgl
, s
, nr_sgt
, i
)
652 srv_path
->dma_addr
[chunks
+ i
] = sg_dma_address(s
);
654 ib_update_fast_reg_key(mr
, ib_inc_rkey(mr
->rkey
));
658 chunk_bits
= ilog2(srv
->queue_depth
- 1) + 1;
659 srv_path
->mem_bits
= (MAX_IMM_PAYL_BITS
- chunk_bits
);
666 ib_dma_unmap_sg(srv_path
->s
.dev
->ib_dev
, sgt
->sgl
,
667 sgt
->nents
, DMA_BIDIRECTIONAL
);
671 unmap_cont_bufs(srv_path
);
676 static void rtrs_srv_hb_err_handler(struct rtrs_con
*c
)
678 struct rtrs_srv_con
*con
= container_of(c
, typeof(*con
), c
);
679 struct rtrs_srv_path
*srv_path
= to_srv_path(con
->c
.path
);
681 rtrs_err(con
->c
.path
, "HB err handler for path=%s\n", kobject_name(&srv_path
->kobj
));
682 close_path(to_srv_path(c
->path
));
685 static void rtrs_srv_init_hb(struct rtrs_srv_path
*srv_path
)
687 rtrs_init_hb(&srv_path
->s
, &io_comp_cqe
,
690 rtrs_srv_hb_err_handler
,
694 static void rtrs_srv_start_hb(struct rtrs_srv_path
*srv_path
)
696 rtrs_start_hb(&srv_path
->s
);
699 static void rtrs_srv_stop_hb(struct rtrs_srv_path
*srv_path
)
701 rtrs_stop_hb(&srv_path
->s
);
704 static void rtrs_srv_info_rsp_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
706 struct rtrs_srv_con
*con
= to_srv_con(wc
->qp
->qp_context
);
707 struct rtrs_path
*s
= con
->c
.path
;
708 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
711 iu
= container_of(wc
->wr_cqe
, struct rtrs_iu
, cqe
);
712 rtrs_iu_free(iu
, srv_path
->s
.dev
->ib_dev
, 1);
714 if (wc
->status
!= IB_WC_SUCCESS
) {
715 rtrs_err(s
, "Sess info response send failed: %s\n",
716 ib_wc_status_msg(wc
->status
));
717 close_path(srv_path
);
720 WARN_ON(wc
->opcode
!= IB_WC_SEND
);
723 static int rtrs_srv_path_up(struct rtrs_srv_path
*srv_path
)
725 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
726 struct rtrs_srv_ctx
*ctx
= srv
->ctx
;
729 mutex_lock(&srv
->paths_ev_mutex
);
730 up
= ++srv
->paths_up
;
732 ret
= ctx
->ops
.link_ev(srv
, RTRS_SRV_LINK_EV_CONNECTED
, NULL
);
733 mutex_unlock(&srv
->paths_ev_mutex
);
735 /* Mark session as established */
737 srv_path
->established
= true;
742 static void rtrs_srv_path_down(struct rtrs_srv_path
*srv_path
)
744 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
745 struct rtrs_srv_ctx
*ctx
= srv
->ctx
;
747 if (!srv_path
->established
)
750 srv_path
->established
= false;
751 mutex_lock(&srv
->paths_ev_mutex
);
752 WARN_ON(!srv
->paths_up
);
753 if (--srv
->paths_up
== 0)
754 ctx
->ops
.link_ev(srv
, RTRS_SRV_LINK_EV_DISCONNECTED
, srv
->priv
);
755 mutex_unlock(&srv
->paths_ev_mutex
);
758 static bool exist_pathname(struct rtrs_srv_ctx
*ctx
,
759 const char *pathname
, const uuid_t
*path_uuid
)
761 struct rtrs_srv_sess
*srv
;
762 struct rtrs_srv_path
*srv_path
;
765 mutex_lock(&ctx
->srv_mutex
);
766 list_for_each_entry(srv
, &ctx
->srv_list
, ctx_list
) {
767 mutex_lock(&srv
->paths_mutex
);
769 /* when a client with same uuid and same sessname tried to add a path */
770 if (uuid_equal(&srv
->paths_uuid
, path_uuid
)) {
771 mutex_unlock(&srv
->paths_mutex
);
775 list_for_each_entry(srv_path
, &srv
->paths_list
, s
.entry
) {
776 if (strlen(srv_path
->s
.sessname
) == strlen(pathname
) &&
777 !strcmp(srv_path
->s
.sessname
, pathname
)) {
782 mutex_unlock(&srv
->paths_mutex
);
786 mutex_unlock(&ctx
->srv_mutex
);
790 static int post_recv_path(struct rtrs_srv_path
*srv_path
);
791 static int rtrs_rdma_do_reject(struct rdma_cm_id
*cm_id
, int errno
);
793 static int process_info_req(struct rtrs_srv_con
*con
,
794 struct rtrs_msg_info_req
*msg
)
796 struct rtrs_path
*s
= con
->c
.path
;
797 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
798 struct ib_send_wr
*reg_wr
= NULL
;
799 struct rtrs_msg_info_rsp
*rsp
;
800 struct rtrs_iu
*tx_iu
;
801 struct ib_reg_wr
*rwr
;
805 err
= post_recv_path(srv_path
);
807 rtrs_err(s
, "post_recv_path(), err: %d\n", err
);
811 if (strchr(msg
->pathname
, '/') || strchr(msg
->pathname
, '.')) {
812 rtrs_err(s
, "pathname cannot contain / and .\n");
816 if (exist_pathname(srv_path
->srv
->ctx
,
817 msg
->pathname
, &srv_path
->srv
->paths_uuid
)) {
818 rtrs_err(s
, "pathname is duplicated: %s\n", msg
->pathname
);
821 strscpy(srv_path
->s
.sessname
, msg
->pathname
,
822 sizeof(srv_path
->s
.sessname
));
824 rwr
= kcalloc(srv_path
->mrs_num
, sizeof(*rwr
), GFP_KERNEL
);
828 tx_sz
= sizeof(*rsp
);
829 tx_sz
+= sizeof(rsp
->desc
[0]) * srv_path
->mrs_num
;
830 tx_iu
= rtrs_iu_alloc(1, tx_sz
, GFP_KERNEL
, srv_path
->s
.dev
->ib_dev
,
831 DMA_TO_DEVICE
, rtrs_srv_info_rsp_done
);
838 rsp
->type
= cpu_to_le16(RTRS_MSG_INFO_RSP
);
839 rsp
->sg_cnt
= cpu_to_le16(srv_path
->mrs_num
);
841 for (mri
= 0; mri
< srv_path
->mrs_num
; mri
++) {
842 struct ib_mr
*mr
= srv_path
->mrs
[mri
].mr
;
844 rsp
->desc
[mri
].addr
= cpu_to_le64(mr
->iova
);
845 rsp
->desc
[mri
].key
= cpu_to_le32(mr
->rkey
);
846 rsp
->desc
[mri
].len
= cpu_to_le32(mr
->length
);
849 * Fill in reg MR request and chain them *backwards*
851 rwr
[mri
].wr
.next
= mri
? &rwr
[mri
- 1].wr
: NULL
;
852 rwr
[mri
].wr
.opcode
= IB_WR_REG_MR
;
853 rwr
[mri
].wr
.wr_cqe
= &local_reg_cqe
;
854 rwr
[mri
].wr
.num_sge
= 0;
855 rwr
[mri
].wr
.send_flags
= 0;
857 rwr
[mri
].key
= mr
->rkey
;
858 rwr
[mri
].access
= (IB_ACCESS_LOCAL_WRITE
|
859 IB_ACCESS_REMOTE_WRITE
);
860 reg_wr
= &rwr
[mri
].wr
;
863 err
= rtrs_srv_create_path_files(srv_path
);
866 kobject_get(&srv_path
->kobj
);
867 get_device(&srv_path
->srv
->dev
);
868 err
= rtrs_srv_change_state(srv_path
, RTRS_SRV_CONNECTED
);
870 rtrs_err(s
, "rtrs_srv_change_state(), err: %d\n", err
);
874 rtrs_srv_start_hb(srv_path
);
877 * We do not account number of established connections at the current
878 * moment, we rely on the client, which should send info request when
879 * all connections are successfully established. Thus, simply notify
880 * listener with a proper event if we are the first path.
882 err
= rtrs_srv_path_up(srv_path
);
884 rtrs_err(s
, "rtrs_srv_path_up(), err: %d\n", err
);
888 ib_dma_sync_single_for_device(srv_path
->s
.dev
->ib_dev
,
890 tx_iu
->size
, DMA_TO_DEVICE
);
892 /* Send info response */
893 err
= rtrs_iu_post_send(&con
->c
, tx_iu
, tx_sz
, reg_wr
);
895 rtrs_err(s
, "rtrs_iu_post_send(), err: %d\n", err
);
897 rtrs_iu_free(tx_iu
, srv_path
->s
.dev
->ib_dev
, 1);
905 static void rtrs_srv_info_req_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
907 struct rtrs_srv_con
*con
= to_srv_con(wc
->qp
->qp_context
);
908 struct rtrs_path
*s
= con
->c
.path
;
909 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
910 struct rtrs_msg_info_req
*msg
;
916 iu
= container_of(wc
->wr_cqe
, struct rtrs_iu
, cqe
);
917 if (wc
->status
!= IB_WC_SUCCESS
) {
918 rtrs_err(s
, "Sess info request receive failed: %s\n",
919 ib_wc_status_msg(wc
->status
));
922 WARN_ON(wc
->opcode
!= IB_WC_RECV
);
924 if (wc
->byte_len
< sizeof(*msg
)) {
925 rtrs_err(s
, "Sess info request is malformed: size %d\n",
929 ib_dma_sync_single_for_cpu(srv_path
->s
.dev
->ib_dev
, iu
->dma_addr
,
930 iu
->size
, DMA_FROM_DEVICE
);
932 if (le16_to_cpu(msg
->type
) != RTRS_MSG_INFO_REQ
) {
933 rtrs_err(s
, "Sess info request is malformed: type %d\n",
934 le16_to_cpu(msg
->type
));
937 err
= process_info_req(con
, msg
);
941 rtrs_iu_free(iu
, srv_path
->s
.dev
->ib_dev
, 1);
944 rtrs_iu_free(iu
, srv_path
->s
.dev
->ib_dev
, 1);
945 close_path(srv_path
);
948 static int post_recv_info_req(struct rtrs_srv_con
*con
)
950 struct rtrs_path
*s
= con
->c
.path
;
951 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
952 struct rtrs_iu
*rx_iu
;
955 rx_iu
= rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req
),
956 GFP_KERNEL
, srv_path
->s
.dev
->ib_dev
,
957 DMA_FROM_DEVICE
, rtrs_srv_info_req_done
);
960 /* Prepare for getting info response */
961 err
= rtrs_iu_post_recv(&con
->c
, rx_iu
);
963 rtrs_err(s
, "rtrs_iu_post_recv(), err: %d\n", err
);
964 rtrs_iu_free(rx_iu
, srv_path
->s
.dev
->ib_dev
, 1);
971 static int post_recv_io(struct rtrs_srv_con
*con
, size_t q_size
)
975 for (i
= 0; i
< q_size
; i
++) {
976 err
= rtrs_post_recv_empty(&con
->c
, &io_comp_cqe
);
984 static int post_recv_path(struct rtrs_srv_path
*srv_path
)
986 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
987 struct rtrs_path
*s
= &srv_path
->s
;
991 for (cid
= 0; cid
< srv_path
->s
.con_num
; cid
++) {
993 q_size
= SERVICE_CON_QUEUE_DEPTH
;
995 q_size
= srv
->queue_depth
;
996 if (srv_path
->state
!= RTRS_SRV_CONNECTING
) {
997 rtrs_err(s
, "Path state invalid. state %s\n",
998 rtrs_srv_state_str(srv_path
->state
));
1002 if (!srv_path
->s
.con
[cid
]) {
1003 rtrs_err(s
, "Conn not set for %d\n", cid
);
1007 err
= post_recv_io(to_srv_con(srv_path
->s
.con
[cid
]), q_size
);
1009 rtrs_err(s
, "post_recv_io(), err: %d\n", err
);
1017 static void process_read(struct rtrs_srv_con
*con
,
1018 struct rtrs_msg_rdma_read
*msg
,
1019 u32 buf_id
, u32 off
)
1021 struct rtrs_path
*s
= con
->c
.path
;
1022 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
1023 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
1024 struct rtrs_srv_ctx
*ctx
= srv
->ctx
;
1025 struct rtrs_srv_op
*id
;
1027 size_t usr_len
, data_len
;
1031 if (srv_path
->state
!= RTRS_SRV_CONNECTED
) {
1033 "Processing read request failed, session is disconnected, sess state %s\n",
1034 rtrs_srv_state_str(srv_path
->state
));
1037 if (msg
->sg_cnt
!= 1 && msg
->sg_cnt
!= 0) {
1039 "Processing read request failed, invalid message\n");
1042 rtrs_srv_get_ops_ids(srv_path
);
1043 rtrs_srv_update_rdma_stats(srv_path
->stats
, off
, READ
);
1044 id
= srv_path
->ops_ids
[buf_id
];
1047 id
->msg_id
= buf_id
;
1049 usr_len
= le16_to_cpu(msg
->usr_len
);
1050 data_len
= off
- usr_len
;
1051 data
= page_address(srv
->chunks
[buf_id
]);
1052 ret
= ctx
->ops
.rdma_ev(srv
->priv
, id
, data
, data_len
,
1053 data
+ data_len
, usr_len
);
1057 "Processing read request failed, user module cb reported for msg_id %d, err: %d\n",
1065 ret
= send_io_resp_imm(con
, id
, ret
);
1068 "Sending err msg for failed RDMA-Write-Req failed, msg_id %d, err: %d\n",
1070 close_path(srv_path
);
1072 rtrs_srv_put_ops_ids(srv_path
);
1075 static void process_write(struct rtrs_srv_con
*con
,
1076 struct rtrs_msg_rdma_write
*req
,
1077 u32 buf_id
, u32 off
)
1079 struct rtrs_path
*s
= con
->c
.path
;
1080 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
1081 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
1082 struct rtrs_srv_ctx
*ctx
= srv
->ctx
;
1083 struct rtrs_srv_op
*id
;
1085 size_t data_len
, usr_len
;
1089 if (srv_path
->state
!= RTRS_SRV_CONNECTED
) {
1091 "Processing write request failed, session is disconnected, sess state %s\n",
1092 rtrs_srv_state_str(srv_path
->state
));
1095 rtrs_srv_get_ops_ids(srv_path
);
1096 rtrs_srv_update_rdma_stats(srv_path
->stats
, off
, WRITE
);
1097 id
= srv_path
->ops_ids
[buf_id
];
1100 id
->msg_id
= buf_id
;
1102 usr_len
= le16_to_cpu(req
->usr_len
);
1103 data_len
= off
- usr_len
;
1104 data
= page_address(srv
->chunks
[buf_id
]);
1105 ret
= ctx
->ops
.rdma_ev(srv
->priv
, id
, data
, data_len
,
1106 data
+ data_len
, usr_len
);
1109 "Processing write request failed, user module callback reports err: %d\n",
1117 ret
= send_io_resp_imm(con
, id
, ret
);
1120 "Processing write request failed, sending I/O response failed, msg_id %d, err: %d\n",
1122 close_path(srv_path
);
1124 rtrs_srv_put_ops_ids(srv_path
);
1127 static void process_io_req(struct rtrs_srv_con
*con
, void *msg
,
1130 struct rtrs_path
*s
= con
->c
.path
;
1131 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
1132 struct rtrs_msg_rdma_hdr
*hdr
;
1135 ib_dma_sync_single_for_cpu(srv_path
->s
.dev
->ib_dev
,
1136 srv_path
->dma_addr
[id
],
1137 max_chunk_size
, DMA_BIDIRECTIONAL
);
1139 type
= le16_to_cpu(hdr
->type
);
1142 case RTRS_MSG_WRITE
:
1143 process_write(con
, msg
, id
, off
);
1146 process_read(con
, msg
, id
, off
);
1150 "Processing I/O request failed, unknown message type received: 0x%02x\n",
1158 close_path(srv_path
);
1161 static void rtrs_srv_inv_rkey_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
1163 struct rtrs_srv_mr
*mr
=
1164 container_of(wc
->wr_cqe
, typeof(*mr
), inv_cqe
);
1165 struct rtrs_srv_con
*con
= to_srv_con(wc
->qp
->qp_context
);
1166 struct rtrs_path
*s
= con
->c
.path
;
1167 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
1168 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
1172 if (wc
->status
!= IB_WC_SUCCESS
) {
1173 rtrs_err(s
, "Failed IB_WR_LOCAL_INV: %s\n",
1174 ib_wc_status_msg(wc
->status
));
1175 close_path(srv_path
);
1177 msg_id
= mr
->msg_id
;
1179 data
= page_address(srv
->chunks
[msg_id
]) + off
;
1180 process_io_req(con
, data
, msg_id
, off
);
1183 static int rtrs_srv_inv_rkey(struct rtrs_srv_con
*con
,
1184 struct rtrs_srv_mr
*mr
)
1186 struct ib_send_wr wr
= {
1187 .opcode
= IB_WR_LOCAL_INV
,
1188 .wr_cqe
= &mr
->inv_cqe
,
1189 .send_flags
= IB_SEND_SIGNALED
,
1190 .ex
.invalidate_rkey
= mr
->mr
->rkey
,
1192 mr
->inv_cqe
.done
= rtrs_srv_inv_rkey_done
;
1194 return ib_post_send(con
->c
.qp
, &wr
, NULL
);
1197 static void rtrs_rdma_process_wr_wait_list(struct rtrs_srv_con
*con
)
1199 spin_lock(&con
->rsp_wr_wait_lock
);
1200 while (!list_empty(&con
->rsp_wr_wait_list
)) {
1201 struct rtrs_srv_op
*id
;
1204 id
= list_entry(con
->rsp_wr_wait_list
.next
,
1205 struct rtrs_srv_op
, wait_list
);
1206 list_del(&id
->wait_list
);
1208 spin_unlock(&con
->rsp_wr_wait_lock
);
1209 ret
= rtrs_srv_resp_rdma(id
, id
->status
);
1210 spin_lock(&con
->rsp_wr_wait_lock
);
1213 list_add(&id
->wait_list
, &con
->rsp_wr_wait_list
);
1217 spin_unlock(&con
->rsp_wr_wait_lock
);
1220 static void rtrs_srv_rdma_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
1222 struct rtrs_srv_con
*con
= to_srv_con(wc
->qp
->qp_context
);
1223 struct rtrs_path
*s
= con
->c
.path
;
1224 struct rtrs_srv_path
*srv_path
= to_srv_path(s
);
1225 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
1226 u32 imm_type
, imm_payload
;
1229 if (wc
->status
!= IB_WC_SUCCESS
) {
1230 if (wc
->status
!= IB_WC_WR_FLUSH_ERR
) {
1232 "%s (wr_cqe: %p, type: %d, vendor_err: 0x%x, len: %u)\n",
1233 ib_wc_status_msg(wc
->status
), wc
->wr_cqe
,
1234 wc
->opcode
, wc
->vendor_err
, wc
->byte_len
);
1235 close_path(srv_path
);
1240 switch (wc
->opcode
) {
1241 case IB_WC_RECV_RDMA_WITH_IMM
:
1243 * post_recv() RDMA write completions of IO reqs (read/write)
1246 if (WARN_ON(wc
->wr_cqe
!= &io_comp_cqe
))
1248 srv_path
->s
.hb_missed_cnt
= 0;
1249 err
= rtrs_post_recv_empty(&con
->c
, &io_comp_cqe
);
1251 rtrs_err(s
, "rtrs_post_recv(), err: %d\n", err
);
1252 close_path(srv_path
);
1255 rtrs_from_imm(be32_to_cpu(wc
->ex
.imm_data
),
1256 &imm_type
, &imm_payload
);
1257 if (imm_type
== RTRS_IO_REQ_IMM
) {
1261 msg_id
= imm_payload
>> srv_path
->mem_bits
;
1262 off
= imm_payload
& ((1 << srv_path
->mem_bits
) - 1);
1263 if (msg_id
>= srv
->queue_depth
|| off
>= max_chunk_size
) {
1264 rtrs_err(s
, "Wrong msg_id %u, off %u\n",
1266 close_path(srv_path
);
1269 if (always_invalidate
) {
1270 struct rtrs_srv_mr
*mr
= &srv_path
->mrs
[msg_id
];
1273 mr
->msg_id
= msg_id
;
1274 err
= rtrs_srv_inv_rkey(con
, mr
);
1276 rtrs_err(s
, "rtrs_post_recv(), err: %d\n",
1278 close_path(srv_path
);
1282 data
= page_address(srv
->chunks
[msg_id
]) + off
;
1283 process_io_req(con
, data
, msg_id
, off
);
1285 } else if (imm_type
== RTRS_HB_MSG_IMM
) {
1286 WARN_ON(con
->c
.cid
);
1287 rtrs_send_hb_ack(&srv_path
->s
);
1288 } else if (imm_type
== RTRS_HB_ACK_IMM
) {
1289 WARN_ON(con
->c
.cid
);
1290 srv_path
->s
.hb_missed_cnt
= 0;
1292 rtrs_wrn(s
, "Unknown IMM type %u\n", imm_type
);
1295 case IB_WC_RDMA_WRITE
:
1298 * post_send() RDMA write completions of IO reqs (read/write)
1301 atomic_add(s
->signal_interval
, &con
->c
.sq_wr_avail
);
1303 if (!list_empty_careful(&con
->rsp_wr_wait_list
))
1304 rtrs_rdma_process_wr_wait_list(con
);
1308 rtrs_wrn(s
, "Unexpected WC type: %d\n", wc
->opcode
);
1314 * rtrs_srv_get_path_name() - Get rtrs_srv peer hostname.
1316 * @pathname: Pathname buffer
1317 * @len: Length of sessname buffer
1319 int rtrs_srv_get_path_name(struct rtrs_srv_sess
*srv
, char *pathname
,
1322 struct rtrs_srv_path
*srv_path
;
1323 int err
= -ENOTCONN
;
1325 mutex_lock(&srv
->paths_mutex
);
1326 list_for_each_entry(srv_path
, &srv
->paths_list
, s
.entry
) {
1327 if (srv_path
->state
!= RTRS_SRV_CONNECTED
)
1329 strscpy(pathname
, srv_path
->s
.sessname
,
1330 min_t(size_t, sizeof(srv_path
->s
.sessname
), len
));
1334 mutex_unlock(&srv
->paths_mutex
);
1338 EXPORT_SYMBOL(rtrs_srv_get_path_name
);
1341 * rtrs_srv_get_queue_depth() - Get rtrs_srv qdepth.
1344 int rtrs_srv_get_queue_depth(struct rtrs_srv_sess
*srv
)
1346 return srv
->queue_depth
;
1348 EXPORT_SYMBOL(rtrs_srv_get_queue_depth
);
1350 static int find_next_bit_ring(struct rtrs_srv_path
*srv_path
)
1352 struct ib_device
*ib_dev
= srv_path
->s
.dev
->ib_dev
;
1355 v
= cpumask_next(srv_path
->cur_cq_vector
, &cq_affinity_mask
);
1356 if (v
>= nr_cpu_ids
|| v
>= ib_dev
->num_comp_vectors
)
1357 v
= cpumask_first(&cq_affinity_mask
);
1361 static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_path
*srv_path
)
1363 srv_path
->cur_cq_vector
= find_next_bit_ring(srv_path
);
1365 return srv_path
->cur_cq_vector
;
1368 static void rtrs_srv_dev_release(struct device
*dev
)
1370 struct rtrs_srv_sess
*srv
= container_of(dev
, struct rtrs_srv_sess
,
1376 static void free_srv(struct rtrs_srv_sess
*srv
)
1380 WARN_ON(refcount_read(&srv
->refcount
));
1381 for (i
= 0; i
< srv
->queue_depth
; i
++)
1382 __free_pages(srv
->chunks
[i
], get_order(max_chunk_size
));
1384 mutex_destroy(&srv
->paths_mutex
);
1385 mutex_destroy(&srv
->paths_ev_mutex
);
1386 /* last put to release the srv structure */
1387 put_device(&srv
->dev
);
1390 static struct rtrs_srv_sess
*get_or_create_srv(struct rtrs_srv_ctx
*ctx
,
1391 const uuid_t
*paths_uuid
,
1394 struct rtrs_srv_sess
*srv
;
1397 mutex_lock(&ctx
->srv_mutex
);
1398 list_for_each_entry(srv
, &ctx
->srv_list
, ctx_list
) {
1399 if (uuid_equal(&srv
->paths_uuid
, paths_uuid
) &&
1400 refcount_inc_not_zero(&srv
->refcount
)) {
1401 mutex_unlock(&ctx
->srv_mutex
);
1405 mutex_unlock(&ctx
->srv_mutex
);
1407 * If this request is not the first connection request from the
1408 * client for this session then fail and return error.
1411 pr_err_ratelimited("Error: Not the first connection request for this session\n");
1412 return ERR_PTR(-ENXIO
);
1415 /* need to allocate a new srv */
1416 srv
= kzalloc(sizeof(*srv
), GFP_KERNEL
);
1418 return ERR_PTR(-ENOMEM
);
1420 INIT_LIST_HEAD(&srv
->paths_list
);
1421 mutex_init(&srv
->paths_mutex
);
1422 mutex_init(&srv
->paths_ev_mutex
);
1423 uuid_copy(&srv
->paths_uuid
, paths_uuid
);
1424 srv
->queue_depth
= sess_queue_depth
;
1426 device_initialize(&srv
->dev
);
1427 srv
->dev
.release
= rtrs_srv_dev_release
;
1429 srv
->chunks
= kcalloc(srv
->queue_depth
, sizeof(*srv
->chunks
),
1434 for (i
= 0; i
< srv
->queue_depth
; i
++) {
1435 srv
->chunks
[i
] = alloc_pages(GFP_KERNEL
,
1436 get_order(max_chunk_size
));
1437 if (!srv
->chunks
[i
])
1438 goto err_free_chunks
;
1440 refcount_set(&srv
->refcount
, 1);
1441 mutex_lock(&ctx
->srv_mutex
);
1442 list_add(&srv
->ctx_list
, &ctx
->srv_list
);
1443 mutex_unlock(&ctx
->srv_mutex
);
1449 __free_pages(srv
->chunks
[i
], get_order(max_chunk_size
));
1454 return ERR_PTR(-ENOMEM
);
1457 static void put_srv(struct rtrs_srv_sess
*srv
)
1459 if (refcount_dec_and_test(&srv
->refcount
)) {
1460 struct rtrs_srv_ctx
*ctx
= srv
->ctx
;
1462 WARN_ON(srv
->dev
.kobj
.state_in_sysfs
);
1464 mutex_lock(&ctx
->srv_mutex
);
1465 list_del(&srv
->ctx_list
);
1466 mutex_unlock(&ctx
->srv_mutex
);
1471 static void __add_path_to_srv(struct rtrs_srv_sess
*srv
,
1472 struct rtrs_srv_path
*srv_path
)
1474 list_add_tail(&srv_path
->s
.entry
, &srv
->paths_list
);
1476 WARN_ON(srv
->paths_num
>= MAX_PATHS_NUM
);
1479 static void del_path_from_srv(struct rtrs_srv_path
*srv_path
)
1481 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
1486 mutex_lock(&srv
->paths_mutex
);
1487 list_del(&srv_path
->s
.entry
);
1488 WARN_ON(!srv
->paths_num
);
1490 mutex_unlock(&srv
->paths_mutex
);
1493 /* return true if addresses are the same, error other wise */
1494 static int sockaddr_cmp(const struct sockaddr
*a
, const struct sockaddr
*b
)
1496 switch (a
->sa_family
) {
1498 return memcmp(&((struct sockaddr_ib
*)a
)->sib_addr
,
1499 &((struct sockaddr_ib
*)b
)->sib_addr
,
1500 sizeof(struct ib_addr
)) &&
1501 (b
->sa_family
== AF_IB
);
1503 return memcmp(&((struct sockaddr_in
*)a
)->sin_addr
,
1504 &((struct sockaddr_in
*)b
)->sin_addr
,
1505 sizeof(struct in_addr
)) &&
1506 (b
->sa_family
== AF_INET
);
1508 return memcmp(&((struct sockaddr_in6
*)a
)->sin6_addr
,
1509 &((struct sockaddr_in6
*)b
)->sin6_addr
,
1510 sizeof(struct in6_addr
)) &&
1511 (b
->sa_family
== AF_INET6
);
1517 static bool __is_path_w_addr_exists(struct rtrs_srv_sess
*srv
,
1518 struct rdma_addr
*addr
)
1520 struct rtrs_srv_path
*srv_path
;
1522 list_for_each_entry(srv_path
, &srv
->paths_list
, s
.entry
)
1523 if (!sockaddr_cmp((struct sockaddr
*)&srv_path
->s
.dst_addr
,
1524 (struct sockaddr
*)&addr
->dst_addr
) &&
1525 !sockaddr_cmp((struct sockaddr
*)&srv_path
->s
.src_addr
,
1526 (struct sockaddr
*)&addr
->src_addr
))
1532 static void free_path(struct rtrs_srv_path
*srv_path
)
1534 if (srv_path
->kobj
.state_in_sysfs
) {
1535 kobject_del(&srv_path
->kobj
);
1536 kobject_put(&srv_path
->kobj
);
1538 free_percpu(srv_path
->stats
->rdma_stats
);
1539 kfree(srv_path
->stats
);
1544 static void rtrs_srv_close_work(struct work_struct
*work
)
1546 struct rtrs_srv_path
*srv_path
;
1547 struct rtrs_srv_con
*con
;
1550 srv_path
= container_of(work
, typeof(*srv_path
), close_work
);
1552 rtrs_srv_stop_hb(srv_path
);
1554 for (i
= 0; i
< srv_path
->s
.con_num
; i
++) {
1555 if (!srv_path
->s
.con
[i
])
1557 con
= to_srv_con(srv_path
->s
.con
[i
]);
1558 rdma_disconnect(con
->c
.cm_id
);
1559 ib_drain_qp(con
->c
.qp
);
1563 * Degrade ref count to the usual model with a single shared
1566 percpu_ref_kill(&srv_path
->ids_inflight_ref
);
1568 /* Wait for all completion */
1569 wait_for_completion(&srv_path
->complete_done
);
1571 rtrs_srv_destroy_path_files(srv_path
);
1573 /* Notify upper layer if we are the last path */
1574 rtrs_srv_path_down(srv_path
);
1576 unmap_cont_bufs(srv_path
);
1577 rtrs_srv_free_ops_ids(srv_path
);
1579 for (i
= 0; i
< srv_path
->s
.con_num
; i
++) {
1580 if (!srv_path
->s
.con
[i
])
1582 con
= to_srv_con(srv_path
->s
.con
[i
]);
1583 rtrs_cq_qp_destroy(&con
->c
);
1584 rdma_destroy_id(con
->c
.cm_id
);
1587 rtrs_ib_dev_put(srv_path
->s
.dev
);
1589 del_path_from_srv(srv_path
);
1590 put_srv(srv_path
->srv
);
1591 srv_path
->srv
= NULL
;
1592 rtrs_srv_change_state(srv_path
, RTRS_SRV_CLOSED
);
1594 kfree(srv_path
->dma_addr
);
1595 kfree(srv_path
->s
.con
);
1596 free_path(srv_path
);
1599 static int rtrs_rdma_do_accept(struct rtrs_srv_path
*srv_path
,
1600 struct rdma_cm_id
*cm_id
)
1602 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
1603 struct rtrs_msg_conn_rsp msg
;
1604 struct rdma_conn_param param
;
1607 param
= (struct rdma_conn_param
) {
1608 .rnr_retry_count
= 7,
1609 .private_data
= &msg
,
1610 .private_data_len
= sizeof(msg
),
1613 msg
= (struct rtrs_msg_conn_rsp
) {
1614 .magic
= cpu_to_le16(RTRS_MAGIC
),
1615 .version
= cpu_to_le16(RTRS_PROTO_VER
),
1616 .queue_depth
= cpu_to_le16(srv
->queue_depth
),
1617 .max_io_size
= cpu_to_le32(max_chunk_size
- MAX_HDR_SIZE
),
1618 .max_hdr_size
= cpu_to_le32(MAX_HDR_SIZE
),
1621 if (always_invalidate
)
1622 msg
.flags
= cpu_to_le32(RTRS_MSG_NEW_RKEY_F
);
1624 err
= rdma_accept(cm_id
, ¶m
);
1626 pr_err("rdma_accept(), err: %d\n", err
);
1631 static int rtrs_rdma_do_reject(struct rdma_cm_id
*cm_id
, int errno
)
1633 struct rtrs_msg_conn_rsp msg
;
1636 msg
= (struct rtrs_msg_conn_rsp
) {
1637 .magic
= cpu_to_le16(RTRS_MAGIC
),
1638 .version
= cpu_to_le16(RTRS_PROTO_VER
),
1639 .errno
= cpu_to_le16(errno
),
1642 err
= rdma_reject(cm_id
, &msg
, sizeof(msg
), IB_CM_REJ_CONSUMER_DEFINED
);
1644 pr_err("rdma_reject(), err: %d\n", err
);
1646 /* Bounce errno back */
1650 static struct rtrs_srv_path
*
1651 __find_path(struct rtrs_srv_sess
*srv
, const uuid_t
*sess_uuid
)
1653 struct rtrs_srv_path
*srv_path
;
1655 list_for_each_entry(srv_path
, &srv
->paths_list
, s
.entry
) {
1656 if (uuid_equal(&srv_path
->s
.uuid
, sess_uuid
))
1663 static int create_con(struct rtrs_srv_path
*srv_path
,
1664 struct rdma_cm_id
*cm_id
,
1667 struct rtrs_srv_sess
*srv
= srv_path
->srv
;
1668 struct rtrs_path
*s
= &srv_path
->s
;
1669 struct rtrs_srv_con
*con
;
1671 u32 cq_num
, max_send_wr
, max_recv_wr
, wr_limit
;
1674 con
= kzalloc(sizeof(*con
), GFP_KERNEL
);
1680 spin_lock_init(&con
->rsp_wr_wait_lock
);
1681 INIT_LIST_HEAD(&con
->rsp_wr_wait_list
);
1682 con
->c
.cm_id
= cm_id
;
1683 con
->c
.path
= &srv_path
->s
;
1685 atomic_set(&con
->c
.wr_cnt
, 1);
1686 wr_limit
= srv_path
->s
.dev
->ib_dev
->attrs
.max_qp_wr
;
1688 if (con
->c
.cid
== 0) {
1690 * All receive and all send (each requiring invalidate)
1691 * + 2 for drain and heartbeat
1693 max_send_wr
= min_t(int, wr_limit
,
1694 SERVICE_CON_QUEUE_DEPTH
* 2 + 2);
1695 max_recv_wr
= max_send_wr
;
1696 s
->signal_interval
= min_not_zero(srv
->queue_depth
,
1697 (size_t)SERVICE_CON_QUEUE_DEPTH
);
1699 /* when always_invlaidate enalbed, we need linv+rinv+mr+imm */
1700 if (always_invalidate
)
1702 min_t(int, wr_limit
,
1703 srv
->queue_depth
* (1 + 4) + 1);
1706 min_t(int, wr_limit
,
1707 srv
->queue_depth
* (1 + 2) + 1);
1709 max_recv_wr
= srv
->queue_depth
+ 1;
1711 cq_num
= max_send_wr
+ max_recv_wr
;
1712 atomic_set(&con
->c
.sq_wr_avail
, max_send_wr
);
1713 cq_vector
= rtrs_srv_get_next_cq_vector(srv_path
);
1715 /* TODO: SOFTIRQ can be faster, but be careful with softirq context */
1716 err
= rtrs_cq_qp_create(&srv_path
->s
, &con
->c
, 1, cq_vector
, cq_num
,
1717 max_send_wr
, max_recv_wr
,
1720 rtrs_err(s
, "rtrs_cq_qp_create(), err: %d\n", err
);
1723 if (con
->c
.cid
== 0) {
1724 err
= post_recv_info_req(con
);
1728 WARN_ON(srv_path
->s
.con
[cid
]);
1729 srv_path
->s
.con
[cid
] = &con
->c
;
1732 * Change context from server to current connection. The other
1733 * way is to use cm_id->qp->qp_context, which does not work on OFED.
1735 cm_id
->context
= &con
->c
;
1740 rtrs_cq_qp_destroy(&con
->c
);
1748 static struct rtrs_srv_path
*__alloc_path(struct rtrs_srv_sess
*srv
,
1749 struct rdma_cm_id
*cm_id
,
1750 unsigned int con_num
,
1751 unsigned int recon_cnt
,
1754 struct rtrs_srv_path
*srv_path
;
1757 struct rtrs_addr path
;
1759 if (srv
->paths_num
>= MAX_PATHS_NUM
) {
1763 if (__is_path_w_addr_exists(srv
, &cm_id
->route
.addr
)) {
1765 pr_err("Path with same addr exists\n");
1768 srv_path
= kzalloc(sizeof(*srv_path
), GFP_KERNEL
);
1772 srv_path
->stats
= kzalloc(sizeof(*srv_path
->stats
), GFP_KERNEL
);
1773 if (!srv_path
->stats
)
1776 srv_path
->stats
->rdma_stats
= alloc_percpu(struct rtrs_srv_stats_rdma_stats
);
1777 if (!srv_path
->stats
->rdma_stats
)
1778 goto err_free_stats
;
1780 srv_path
->stats
->srv_path
= srv_path
;
1782 srv_path
->dma_addr
= kcalloc(srv
->queue_depth
,
1783 sizeof(*srv_path
->dma_addr
),
1785 if (!srv_path
->dma_addr
)
1786 goto err_free_percpu
;
1788 srv_path
->s
.con
= kcalloc(con_num
, sizeof(*srv_path
->s
.con
),
1790 if (!srv_path
->s
.con
)
1791 goto err_free_dma_addr
;
1793 srv_path
->state
= RTRS_SRV_CONNECTING
;
1794 srv_path
->srv
= srv
;
1795 srv_path
->cur_cq_vector
= -1;
1796 srv_path
->s
.dst_addr
= cm_id
->route
.addr
.dst_addr
;
1797 srv_path
->s
.src_addr
= cm_id
->route
.addr
.src_addr
;
1799 /* temporary until receiving session-name from client */
1800 path
.src
= &srv_path
->s
.src_addr
;
1801 path
.dst
= &srv_path
->s
.dst_addr
;
1802 rtrs_addr_to_str(&path
, str
, sizeof(str
));
1803 strscpy(srv_path
->s
.sessname
, str
, sizeof(srv_path
->s
.sessname
));
1805 srv_path
->s
.con_num
= con_num
;
1806 srv_path
->s
.irq_con_num
= con_num
;
1807 srv_path
->s
.recon_cnt
= recon_cnt
;
1808 uuid_copy(&srv_path
->s
.uuid
, uuid
);
1809 spin_lock_init(&srv_path
->state_lock
);
1810 INIT_WORK(&srv_path
->close_work
, rtrs_srv_close_work
);
1811 rtrs_srv_init_hb(srv_path
);
1813 srv_path
->s
.dev
= rtrs_ib_dev_find_or_add(cm_id
->device
, &dev_pd
);
1814 if (!srv_path
->s
.dev
) {
1818 err
= map_cont_bufs(srv_path
);
1822 err
= rtrs_srv_alloc_ops_ids(srv_path
);
1824 goto err_unmap_bufs
;
1826 __add_path_to_srv(srv
, srv_path
);
1831 unmap_cont_bufs(srv_path
);
1833 rtrs_ib_dev_put(srv_path
->s
.dev
);
1835 kfree(srv_path
->s
.con
);
1837 kfree(srv_path
->dma_addr
);
1839 free_percpu(srv_path
->stats
->rdma_stats
);
1841 kfree(srv_path
->stats
);
1845 return ERR_PTR(err
);
1848 static int rtrs_rdma_connect(struct rdma_cm_id
*cm_id
,
1849 const struct rtrs_msg_conn_req
*msg
,
1852 struct rtrs_srv_ctx
*ctx
= cm_id
->context
;
1853 struct rtrs_srv_path
*srv_path
;
1854 struct rtrs_srv_sess
*srv
;
1856 u16 version
, con_num
, cid
;
1858 int err
= -ECONNRESET
;
1860 if (len
< sizeof(*msg
)) {
1861 pr_err("Invalid RTRS connection request\n");
1864 if (le16_to_cpu(msg
->magic
) != RTRS_MAGIC
) {
1865 pr_err("Invalid RTRS magic\n");
1868 version
= le16_to_cpu(msg
->version
);
1869 if (version
>> 8 != RTRS_PROTO_VER_MAJOR
) {
1870 pr_err("Unsupported major RTRS version: %d, expected %d\n",
1871 version
>> 8, RTRS_PROTO_VER_MAJOR
);
1874 con_num
= le16_to_cpu(msg
->cid_num
);
1875 if (con_num
> 4096) {
1877 pr_err("Too many connections requested: %d\n", con_num
);
1880 cid
= le16_to_cpu(msg
->cid
);
1881 if (cid
>= con_num
) {
1883 pr_err("Incorrect cid: %d >= %d\n", cid
, con_num
);
1886 recon_cnt
= le16_to_cpu(msg
->recon_cnt
);
1887 srv
= get_or_create_srv(ctx
, &msg
->paths_uuid
, msg
->first_conn
);
1890 pr_err("get_or_create_srv(), error %d\n", err
);
1893 mutex_lock(&srv
->paths_mutex
);
1894 srv_path
= __find_path(srv
, &msg
->sess_uuid
);
1896 struct rtrs_path
*s
= &srv_path
->s
;
1898 /* Session already holds a reference */
1901 if (srv_path
->state
!= RTRS_SRV_CONNECTING
) {
1902 rtrs_err(s
, "Session in wrong state: %s\n",
1903 rtrs_srv_state_str(srv_path
->state
));
1904 mutex_unlock(&srv
->paths_mutex
);
1910 if (con_num
!= s
->con_num
|| cid
>= s
->con_num
) {
1911 rtrs_err(s
, "Incorrect request: %d, %d\n",
1913 mutex_unlock(&srv
->paths_mutex
);
1917 rtrs_err(s
, "Connection already exists: %d\n",
1919 mutex_unlock(&srv
->paths_mutex
);
1923 srv_path
= __alloc_path(srv
, cm_id
, con_num
, recon_cnt
,
1925 if (IS_ERR(srv_path
)) {
1926 mutex_unlock(&srv
->paths_mutex
);
1928 err
= PTR_ERR(srv_path
);
1929 pr_err("RTRS server session allocation failed: %d\n", err
);
1933 err
= create_con(srv_path
, cm_id
, cid
);
1935 rtrs_err((&srv_path
->s
), "create_con(), error %d\n", err
);
1936 rtrs_rdma_do_reject(cm_id
, err
);
1938 * Since session has other connections we follow normal way
1939 * through workqueue, but still return an error to tell cma.c
1940 * to call rdma_destroy_id() for current connection.
1942 goto close_and_return_err
;
1944 err
= rtrs_rdma_do_accept(srv_path
, cm_id
);
1946 rtrs_err((&srv_path
->s
), "rtrs_rdma_do_accept(), error %d\n", err
);
1947 rtrs_rdma_do_reject(cm_id
, err
);
1949 * Since current connection was successfully added to the
1950 * session we follow normal way through workqueue to close the
1951 * session, thus return 0 to tell cma.c we call
1952 * rdma_destroy_id() ourselves.
1955 goto close_and_return_err
;
1957 mutex_unlock(&srv
->paths_mutex
);
1962 return rtrs_rdma_do_reject(cm_id
, err
);
1964 close_and_return_err
:
1965 mutex_unlock(&srv
->paths_mutex
);
1966 close_path(srv_path
);
1971 static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id
*cm_id
,
1972 struct rdma_cm_event
*ev
)
1974 struct rtrs_srv_path
*srv_path
= NULL
;
1975 struct rtrs_path
*s
= NULL
;
1976 struct rtrs_con
*c
= NULL
;
1978 if (ev
->event
== RDMA_CM_EVENT_CONNECT_REQUEST
)
1980 * In case of error cma.c will destroy cm_id,
1981 * see cma_process_remove()
1983 return rtrs_rdma_connect(cm_id
, ev
->param
.conn
.private_data
,
1984 ev
->param
.conn
.private_data_len
);
1988 srv_path
= to_srv_path(s
);
1990 switch (ev
->event
) {
1991 case RDMA_CM_EVENT_ESTABLISHED
:
1994 case RDMA_CM_EVENT_REJECTED
:
1995 case RDMA_CM_EVENT_CONNECT_ERROR
:
1996 case RDMA_CM_EVENT_UNREACHABLE
:
1997 rtrs_err(s
, "CM error (CM event: %s, err: %d)\n",
1998 rdma_event_msg(ev
->event
), ev
->status
);
2000 case RDMA_CM_EVENT_DISCONNECTED
:
2001 case RDMA_CM_EVENT_ADDR_CHANGE
:
2002 case RDMA_CM_EVENT_TIMEWAIT_EXIT
:
2003 case RDMA_CM_EVENT_DEVICE_REMOVAL
:
2004 close_path(srv_path
);
2007 pr_err("Ignoring unexpected CM event %s, err %d\n",
2008 rdma_event_msg(ev
->event
), ev
->status
);
2015 static struct rdma_cm_id
*rtrs_srv_cm_init(struct rtrs_srv_ctx
*ctx
,
2016 struct sockaddr
*addr
,
2017 enum rdma_ucm_port_space ps
)
2019 struct rdma_cm_id
*cm_id
;
2022 cm_id
= rdma_create_id(&init_net
, rtrs_srv_rdma_cm_handler
,
2023 ctx
, ps
, IB_QPT_RC
);
2024 if (IS_ERR(cm_id
)) {
2025 ret
= PTR_ERR(cm_id
);
2026 pr_err("Creating id for RDMA connection failed, err: %d\n",
2030 ret
= rdma_bind_addr(cm_id
, addr
);
2032 pr_err("Binding RDMA address failed, err: %d\n", ret
);
2035 ret
= rdma_listen(cm_id
, 64);
2037 pr_err("Listening on RDMA connection failed, err: %d\n",
2045 rdma_destroy_id(cm_id
);
2048 return ERR_PTR(ret
);
2051 static int rtrs_srv_rdma_init(struct rtrs_srv_ctx
*ctx
, u16 port
)
2053 struct sockaddr_in6 sin
= {
2054 .sin6_family
= AF_INET6
,
2055 .sin6_addr
= IN6ADDR_ANY_INIT
,
2056 .sin6_port
= htons(port
),
2058 struct sockaddr_ib sib
= {
2059 .sib_family
= AF_IB
,
2060 .sib_sid
= cpu_to_be64(RDMA_IB_IP_PS_IB
| port
),
2061 .sib_sid_mask
= cpu_to_be64(0xffffffffffffffffULL
),
2062 .sib_pkey
= cpu_to_be16(0xffff),
2064 struct rdma_cm_id
*cm_ip
, *cm_ib
;
2068 * We accept both IPoIB and IB connections, so we need to keep
2069 * two cm id's, one for each socket type and port space.
2070 * If the cm initialization of one of the id's fails, we abort
2073 cm_ip
= rtrs_srv_cm_init(ctx
, (struct sockaddr
*)&sin
, RDMA_PS_TCP
);
2075 return PTR_ERR(cm_ip
);
2077 cm_ib
= rtrs_srv_cm_init(ctx
, (struct sockaddr
*)&sib
, RDMA_PS_IB
);
2078 if (IS_ERR(cm_ib
)) {
2079 ret
= PTR_ERR(cm_ib
);
2083 ctx
->cm_id_ip
= cm_ip
;
2084 ctx
->cm_id_ib
= cm_ib
;
2089 rdma_destroy_id(cm_ip
);
2094 static struct rtrs_srv_ctx
*alloc_srv_ctx(struct rtrs_srv_ops
*ops
)
2096 struct rtrs_srv_ctx
*ctx
;
2098 ctx
= kzalloc(sizeof(*ctx
), GFP_KERNEL
);
2103 mutex_init(&ctx
->srv_mutex
);
2104 INIT_LIST_HEAD(&ctx
->srv_list
);
2109 static void free_srv_ctx(struct rtrs_srv_ctx
*ctx
)
2111 WARN_ON(!list_empty(&ctx
->srv_list
));
2112 mutex_destroy(&ctx
->srv_mutex
);
2116 static int rtrs_srv_add_one(struct ib_device
*device
)
2118 struct rtrs_srv_ctx
*ctx
;
2121 mutex_lock(&ib_ctx
.ib_dev_mutex
);
2122 if (ib_ctx
.ib_dev_count
)
2126 * Since our CM IDs are NOT bound to any ib device we will create them
2129 ctx
= ib_ctx
.srv_ctx
;
2130 ret
= rtrs_srv_rdma_init(ctx
, ib_ctx
.port
);
2133 * We errored out here.
2134 * According to the ib code, if we encounter an error here then the
2135 * error code is ignored, and no more calls to our ops are made.
2137 pr_err("Failed to initialize RDMA connection");
2143 * Keep a track on the number of ib devices added
2145 ib_ctx
.ib_dev_count
++;
2148 mutex_unlock(&ib_ctx
.ib_dev_mutex
);
2152 static void rtrs_srv_remove_one(struct ib_device
*device
, void *client_data
)
2154 struct rtrs_srv_ctx
*ctx
;
2156 mutex_lock(&ib_ctx
.ib_dev_mutex
);
2157 ib_ctx
.ib_dev_count
--;
2159 if (ib_ctx
.ib_dev_count
)
2163 * Since our CM IDs are NOT bound to any ib device we will remove them
2164 * only once, when the last device is removed
2166 ctx
= ib_ctx
.srv_ctx
;
2167 rdma_destroy_id(ctx
->cm_id_ip
);
2168 rdma_destroy_id(ctx
->cm_id_ib
);
2171 mutex_unlock(&ib_ctx
.ib_dev_mutex
);
2174 static struct ib_client rtrs_srv_client
= {
2175 .name
= "rtrs_server",
2176 .add
= rtrs_srv_add_one
,
2177 .remove
= rtrs_srv_remove_one
2181 * rtrs_srv_open() - open RTRS server context
2182 * @ops: callback functions
2183 * @port: port to listen on
2185 * Creates server context with specified callbacks.
2187 * Return a valid pointer on success otherwise PTR_ERR.
2189 struct rtrs_srv_ctx
*rtrs_srv_open(struct rtrs_srv_ops
*ops
, u16 port
)
2191 struct rtrs_srv_ctx
*ctx
;
2194 ctx
= alloc_srv_ctx(ops
);
2196 return ERR_PTR(-ENOMEM
);
2198 mutex_init(&ib_ctx
.ib_dev_mutex
);
2199 ib_ctx
.srv_ctx
= ctx
;
2202 err
= ib_register_client(&rtrs_srv_client
);
2205 return ERR_PTR(err
);
2210 EXPORT_SYMBOL(rtrs_srv_open
);
2212 static void close_paths(struct rtrs_srv_sess
*srv
)
2214 struct rtrs_srv_path
*srv_path
;
2216 mutex_lock(&srv
->paths_mutex
);
2217 list_for_each_entry(srv_path
, &srv
->paths_list
, s
.entry
)
2218 close_path(srv_path
);
2219 mutex_unlock(&srv
->paths_mutex
);
2222 static void close_ctx(struct rtrs_srv_ctx
*ctx
)
2224 struct rtrs_srv_sess
*srv
;
2226 mutex_lock(&ctx
->srv_mutex
);
2227 list_for_each_entry(srv
, &ctx
->srv_list
, ctx_list
)
2229 mutex_unlock(&ctx
->srv_mutex
);
2230 flush_workqueue(rtrs_wq
);
2234 * rtrs_srv_close() - close RTRS server context
2235 * @ctx: pointer to server context
2237 * Closes RTRS server context with all client sessions.
2239 void rtrs_srv_close(struct rtrs_srv_ctx
*ctx
)
2241 ib_unregister_client(&rtrs_srv_client
);
2242 mutex_destroy(&ib_ctx
.ib_dev_mutex
);
2246 EXPORT_SYMBOL(rtrs_srv_close
);
2248 static int check_module_params(void)
2250 if (sess_queue_depth
< 1 || sess_queue_depth
> MAX_SESS_QUEUE_DEPTH
) {
2251 pr_err("Invalid sess_queue_depth value %d, has to be >= %d, <= %d.\n",
2252 sess_queue_depth
, 1, MAX_SESS_QUEUE_DEPTH
);
2255 if (max_chunk_size
< MIN_CHUNK_SIZE
|| !is_power_of_2(max_chunk_size
)) {
2256 pr_err("Invalid max_chunk_size value %d, has to be >= %d and should be power of two.\n",
2257 max_chunk_size
, MIN_CHUNK_SIZE
);
2262 * Check if IB immediate data size is enough to hold the mem_id and the
2263 * offset inside the memory chunk
2265 if ((ilog2(sess_queue_depth
- 1) + 1) +
2266 (ilog2(max_chunk_size
- 1) + 1) > MAX_IMM_PAYL_BITS
) {
2267 pr_err("RDMA immediate size (%db) not enough to encode %d buffers of size %dB. Reduce 'sess_queue_depth' or 'max_chunk_size' parameters.\n",
2268 MAX_IMM_PAYL_BITS
, sess_queue_depth
, max_chunk_size
);
2275 void rtrs_srv_ib_event_handler(struct ib_event_handler
*handler
,
2276 struct ib_event
*ibevent
)
2278 pr_info("Handling event: %s (%d).\n", ib_event_msg(ibevent
->event
),
2282 static int rtrs_srv_ib_dev_init(struct rtrs_ib_dev
*dev
)
2284 INIT_IB_EVENT_HANDLER(&dev
->event_handler
, dev
->ib_dev
,
2285 rtrs_srv_ib_event_handler
);
2286 ib_register_event_handler(&dev
->event_handler
);
2291 static void rtrs_srv_ib_dev_deinit(struct rtrs_ib_dev
*dev
)
2293 ib_unregister_event_handler(&dev
->event_handler
);
2297 static const struct rtrs_rdma_dev_pd_ops dev_pd_ops
= {
2298 .init
= rtrs_srv_ib_dev_init
,
2299 .deinit
= rtrs_srv_ib_dev_deinit
2303 static int __init
rtrs_server_init(void)
2307 pr_info("Loading module %s, proto %s: (max_chunk_size: %d (pure IO %ld, headers %ld) , sess_queue_depth: %d, always_invalidate: %d)\n",
2308 KBUILD_MODNAME
, RTRS_PROTO_VER_STRING
,
2309 max_chunk_size
, max_chunk_size
- MAX_HDR_SIZE
, MAX_HDR_SIZE
,
2310 sess_queue_depth
, always_invalidate
);
2312 rtrs_rdma_dev_pd_init(0, &dev_pd
);
2314 err
= check_module_params();
2316 pr_err("Failed to load module, invalid module parameters, err: %d\n",
2320 err
= class_register(&rtrs_dev_class
);
2324 rtrs_wq
= alloc_workqueue("rtrs_server_wq", 0, 0);
2333 class_unregister(&rtrs_dev_class
);
2338 static void __exit
rtrs_server_exit(void)
2340 destroy_workqueue(rtrs_wq
);
2341 class_unregister(&rtrs_dev_class
);
2342 rtrs_rdma_dev_pd_deinit(&dev_pd
);
2345 module_init(rtrs_server_init
);
2346 module_exit(rtrs_server_exit
);