1 // SPDX-License-Identifier: GPL-2.0-or-later
5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
10 #define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
12 #include <linux/module.h>
13 #include <linux/inet.h>
18 MODULE_DESCRIPTION("RDMA Transport Core");
19 MODULE_LICENSE("GPL");
21 struct rtrs_iu
*rtrs_iu_alloc(u32 iu_num
, size_t size
, gfp_t gfp_mask
,
22 struct ib_device
*dma_dev
,
23 enum dma_data_direction dir
,
24 void (*done
)(struct ib_cq
*cq
, struct ib_wc
*wc
))
26 struct rtrs_iu
*ius
, *iu
;
29 ius
= kcalloc(iu_num
, sizeof(*ius
), gfp_mask
);
32 for (i
= 0; i
< iu_num
; i
++) {
35 iu
->buf
= kzalloc(size
, gfp_mask
);
39 iu
->dma_addr
= ib_dma_map_single(dma_dev
, iu
->buf
, size
, dir
);
40 if (ib_dma_mapping_error(dma_dev
, iu
->dma_addr
)) {
50 rtrs_iu_free(ius
, dma_dev
, i
);
53 EXPORT_SYMBOL_GPL(rtrs_iu_alloc
);
55 void rtrs_iu_free(struct rtrs_iu
*ius
, struct ib_device
*ibdev
, u32 queue_num
)
63 for (i
= 0; i
< queue_num
; i
++) {
65 ib_dma_unmap_single(ibdev
, iu
->dma_addr
, iu
->size
, iu
->direction
);
70 EXPORT_SYMBOL_GPL(rtrs_iu_free
);
72 int rtrs_iu_post_recv(struct rtrs_con
*con
, struct rtrs_iu
*iu
)
74 struct rtrs_path
*path
= con
->path
;
78 list
.addr
= iu
->dma_addr
;
79 list
.length
= iu
->size
;
80 list
.lkey
= path
->dev
->ib_pd
->local_dma_lkey
;
82 if (list
.length
== 0) {
84 "Posting receive work request failed, sg list is empty\n");
87 wr
= (struct ib_recv_wr
) {
93 return ib_post_recv(con
->qp
, &wr
, NULL
);
95 EXPORT_SYMBOL_GPL(rtrs_iu_post_recv
);
97 int rtrs_post_recv_empty(struct rtrs_con
*con
, struct ib_cqe
*cqe
)
101 wr
= (struct ib_recv_wr
) {
105 return ib_post_recv(con
->qp
, &wr
, NULL
);
107 EXPORT_SYMBOL_GPL(rtrs_post_recv_empty
);
109 static int rtrs_post_send(struct ib_qp
*qp
, struct ib_send_wr
*head
,
110 struct ib_send_wr
*wr
, struct ib_send_wr
*tail
)
113 struct ib_send_wr
*next
= head
;
125 return ib_post_send(qp
, head
, NULL
);
128 int rtrs_iu_post_send(struct rtrs_con
*con
, struct rtrs_iu
*iu
, size_t size
,
129 struct ib_send_wr
*head
)
131 struct rtrs_path
*path
= con
->path
;
132 struct ib_send_wr wr
;
135 if (WARN_ON(size
== 0))
138 list
.addr
= iu
->dma_addr
;
140 list
.lkey
= path
->dev
->ib_pd
->local_dma_lkey
;
142 wr
= (struct ib_send_wr
) {
146 .opcode
= IB_WR_SEND
,
147 .send_flags
= IB_SEND_SIGNALED
,
150 return rtrs_post_send(con
->qp
, head
, &wr
, NULL
);
152 EXPORT_SYMBOL_GPL(rtrs_iu_post_send
);
154 int rtrs_iu_post_rdma_write_imm(struct rtrs_con
*con
, struct rtrs_iu
*iu
,
155 struct ib_sge
*sge
, unsigned int num_sge
,
156 u32 rkey
, u64 rdma_addr
, u32 imm_data
,
157 enum ib_send_flags flags
,
158 struct ib_send_wr
*head
,
159 struct ib_send_wr
*tail
)
161 struct ib_rdma_wr wr
;
164 wr
= (struct ib_rdma_wr
) {
165 .wr
.wr_cqe
= &iu
->cqe
,
167 .wr
.num_sge
= num_sge
,
169 .remote_addr
= rdma_addr
,
170 .wr
.opcode
= IB_WR_RDMA_WRITE_WITH_IMM
,
171 .wr
.ex
.imm_data
= cpu_to_be32(imm_data
),
172 .wr
.send_flags
= flags
,
176 * If one of the sges has 0 size, the operation will fail with a
179 for (i
= 0; i
< num_sge
; i
++)
180 if (WARN_ONCE(sge
[i
].length
== 0, "sg %d is zero length\n", i
))
183 return rtrs_post_send(con
->qp
, head
, &wr
.wr
, tail
);
185 EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm
);
187 static int rtrs_post_rdma_write_imm_empty(struct rtrs_con
*con
,
190 struct ib_send_wr
*head
)
192 struct ib_rdma_wr wr
;
193 struct rtrs_path
*path
= con
->path
;
194 enum ib_send_flags sflags
;
196 atomic_dec_if_positive(&con
->sq_wr_avail
);
197 sflags
= (atomic_inc_return(&con
->wr_cnt
) % path
->signal_interval
) ?
198 0 : IB_SEND_SIGNALED
;
200 wr
= (struct ib_rdma_wr
) {
202 .wr
.send_flags
= sflags
,
203 .wr
.opcode
= IB_WR_RDMA_WRITE_WITH_IMM
,
204 .wr
.ex
.imm_data
= cpu_to_be32(imm_data
),
207 return rtrs_post_send(con
->qp
, head
, &wr
.wr
, NULL
);
210 static void qp_event_handler(struct ib_event
*ev
, void *ctx
)
212 struct rtrs_con
*con
= ctx
;
215 case IB_EVENT_COMM_EST
:
216 rtrs_info(con
->path
, "QP event %s (%d) received\n",
217 ib_event_msg(ev
->event
), ev
->event
);
218 rdma_notify(con
->cm_id
, IB_EVENT_COMM_EST
);
221 rtrs_info(con
->path
, "Unhandled QP event %s (%d) received\n",
222 ib_event_msg(ev
->event
), ev
->event
);
227 static bool is_pollqueue(struct rtrs_con
*con
)
229 return con
->cid
>= con
->path
->irq_con_num
;
232 static int create_cq(struct rtrs_con
*con
, int cq_vector
, int nr_cqe
,
233 enum ib_poll_context poll_ctx
)
235 struct rdma_cm_id
*cm_id
= con
->cm_id
;
238 if (is_pollqueue(con
))
239 cq
= ib_alloc_cq(cm_id
->device
, con
, nr_cqe
, cq_vector
,
242 cq
= ib_cq_pool_get(cm_id
->device
, nr_cqe
, cq_vector
, poll_ctx
);
245 rtrs_err(con
->path
, "Creating completion queue failed, errno: %pe\n",
250 con
->nr_cqe
= nr_cqe
;
255 static int create_qp(struct rtrs_con
*con
, struct ib_pd
*pd
,
256 u32 max_send_wr
, u32 max_recv_wr
, u32 max_sge
)
258 struct ib_qp_init_attr init_attr
= {};
259 struct rdma_cm_id
*cm_id
= con
->cm_id
;
262 init_attr
.cap
.max_send_wr
= max_send_wr
;
263 init_attr
.cap
.max_recv_wr
= max_recv_wr
;
264 init_attr
.cap
.max_recv_sge
= 1;
265 init_attr
.event_handler
= qp_event_handler
;
266 init_attr
.qp_context
= con
;
267 init_attr
.cap
.max_send_sge
= max_sge
;
269 init_attr
.qp_type
= IB_QPT_RC
;
270 init_attr
.send_cq
= con
->cq
;
271 init_attr
.recv_cq
= con
->cq
;
272 init_attr
.sq_sig_type
= IB_SIGNAL_REQ_WR
;
274 ret
= rdma_create_qp(cm_id
, pd
, &init_attr
);
276 rtrs_err(con
->path
, "Creating QP failed, err: %d\n", ret
);
284 static void destroy_cq(struct rtrs_con
*con
)
287 if (is_pollqueue(con
))
290 ib_cq_pool_put(con
->cq
, con
->nr_cqe
);
295 int rtrs_cq_qp_create(struct rtrs_path
*path
, struct rtrs_con
*con
,
296 u32 max_send_sge
, int cq_vector
, int nr_cqe
,
297 u32 max_send_wr
, u32 max_recv_wr
,
298 enum ib_poll_context poll_ctx
)
302 err
= create_cq(con
, cq_vector
, nr_cqe
, poll_ctx
);
306 err
= create_qp(con
, path
->dev
->ib_pd
, max_send_wr
, max_recv_wr
,
316 EXPORT_SYMBOL_GPL(rtrs_cq_qp_create
);
318 void rtrs_cq_qp_destroy(struct rtrs_con
*con
)
321 rdma_destroy_qp(con
->cm_id
);
326 EXPORT_SYMBOL_GPL(rtrs_cq_qp_destroy
);
328 static void schedule_hb(struct rtrs_path
*path
)
330 queue_delayed_work(path
->hb_wq
, &path
->hb_dwork
,
331 msecs_to_jiffies(path
->hb_interval_ms
));
334 void rtrs_send_hb_ack(struct rtrs_path
*path
)
336 struct rtrs_con
*usr_con
= path
->con
[0];
340 imm
= rtrs_to_imm(RTRS_HB_ACK_IMM
, 0);
341 err
= rtrs_post_rdma_write_imm_empty(usr_con
, path
->hb_cqe
, imm
,
344 rtrs_err(path
, "send HB ACK failed, errno: %d\n", err
);
345 path
->hb_err_handler(usr_con
);
349 EXPORT_SYMBOL_GPL(rtrs_send_hb_ack
);
351 static void hb_work(struct work_struct
*work
)
353 struct rtrs_con
*usr_con
;
354 struct rtrs_path
*path
;
358 path
= container_of(to_delayed_work(work
), typeof(*path
), hb_dwork
);
359 usr_con
= path
->con
[0];
361 if (path
->hb_missed_cnt
> path
->hb_missed_max
) {
362 rtrs_err(path
, "HB missed max reached.\n");
363 path
->hb_err_handler(usr_con
);
366 if (path
->hb_missed_cnt
++) {
367 /* Reschedule work without sending hb */
372 path
->hb_last_sent
= ktime_get();
374 imm
= rtrs_to_imm(RTRS_HB_MSG_IMM
, 0);
375 err
= rtrs_post_rdma_write_imm_empty(usr_con
, path
->hb_cqe
, imm
,
378 rtrs_err(path
, "HB send failed, errno: %d\n", err
);
379 path
->hb_err_handler(usr_con
);
386 void rtrs_init_hb(struct rtrs_path
*path
, struct ib_cqe
*cqe
,
387 unsigned int interval_ms
, unsigned int missed_max
,
388 void (*err_handler
)(struct rtrs_con
*con
),
389 struct workqueue_struct
*wq
)
392 path
->hb_interval_ms
= interval_ms
;
393 path
->hb_err_handler
= err_handler
;
395 path
->hb_missed_max
= missed_max
;
396 path
->hb_missed_cnt
= 0;
397 INIT_DELAYED_WORK(&path
->hb_dwork
, hb_work
);
399 EXPORT_SYMBOL_GPL(rtrs_init_hb
);
401 void rtrs_start_hb(struct rtrs_path
*path
)
405 EXPORT_SYMBOL_GPL(rtrs_start_hb
);
407 void rtrs_stop_hb(struct rtrs_path
*path
)
409 cancel_delayed_work_sync(&path
->hb_dwork
);
410 path
->hb_missed_cnt
= 0;
412 EXPORT_SYMBOL_GPL(rtrs_stop_hb
);
414 static int rtrs_str_gid_to_sockaddr(const char *addr
, size_t len
,
415 short port
, struct sockaddr_storage
*dst
)
417 struct sockaddr_ib
*dst_ib
= (struct sockaddr_ib
*)dst
;
421 * We can use some of the IPv6 functions since GID is a valid
422 * IPv6 address format
424 ret
= in6_pton(addr
, len
, dst_ib
->sib_addr
.sib_raw
, '\0', NULL
);
428 dst_ib
->sib_family
= AF_IB
;
430 * Use the same TCP server port number as the IB service ID
431 * on the IB port space range
433 dst_ib
->sib_sid
= cpu_to_be64(RDMA_IB_IP_PS_IB
| port
);
434 dst_ib
->sib_sid_mask
= cpu_to_be64(0xffffffffffffffffULL
);
435 dst_ib
->sib_pkey
= cpu_to_be16(0xffff);
441 * rtrs_str_to_sockaddr() - Convert rtrs address string to sockaddr
442 * @addr: String representation of an addr (IPv4, IPv6 or IB GID):
444 * - "ip:fe80::200:5aee:feaa:20a2"
445 * - "gid:fe80::200:5aee:feaa:20a2"
446 * @len: String address length
447 * @port: Destination port
448 * @dst: Destination sockaddr structure
450 * Returns 0 if conversion successful. Non-zero on error.
452 static int rtrs_str_to_sockaddr(const char *addr
, size_t len
,
453 u16 port
, struct sockaddr_storage
*dst
)
455 if (strncmp(addr
, "gid:", 4) == 0) {
456 return rtrs_str_gid_to_sockaddr(addr
+ 4, len
- 4, port
, dst
);
457 } else if (strncmp(addr
, "ip:", 3) == 0) {
462 snprintf(port_str
, sizeof(port_str
), "%u", port
);
463 cpy
= kstrndup(addr
+ 3, len
- 3, GFP_KERNEL
);
464 err
= cpy
? inet_pton_with_scope(&init_net
, AF_UNSPEC
,
465 cpy
, port_str
, dst
) : -ENOMEM
;
470 return -EPROTONOSUPPORT
;
474 * sockaddr_to_str() - convert sockaddr to a string.
475 * @addr: the sockadddr structure to be converted.
476 * @buf: string containing socket addr.
477 * @len: string length.
479 * The return value is the number of characters written into buf not
480 * including the trailing '\0'. If len is == 0 the function returns 0..
482 int sockaddr_to_str(const struct sockaddr
*addr
, char *buf
, size_t len
)
484 switch (addr
->sa_family
) {
486 return scnprintf(buf
, len
, "gid:%pI6",
487 &((struct sockaddr_ib
*)addr
)->sib_addr
.sib_raw
);
489 return scnprintf(buf
, len
, "ip:%pI4",
490 &((struct sockaddr_in
*)addr
)->sin_addr
);
492 return scnprintf(buf
, len
, "ip:%pI6c",
493 &((struct sockaddr_in6
*)addr
)->sin6_addr
);
495 return scnprintf(buf
, len
, "<invalid address family>");
497 EXPORT_SYMBOL(sockaddr_to_str
);
500 * rtrs_addr_to_str() - convert rtrs_addr to a string "src@dst"
501 * @addr: the rtrs_addr structure to be converted
502 * @buf: string containing source and destination addr of a path
503 * separated by '@' I.e. "ip:1.1.1.1@ip:1.1.1.2"
504 * "ip:1.1.1.1@ip:1.1.1.2".
505 * @len: string length
507 * The return value is the number of characters written into buf not
508 * including the trailing '\0'.
510 int rtrs_addr_to_str(const struct rtrs_addr
*addr
, char *buf
, size_t len
)
514 cnt
= sockaddr_to_str((struct sockaddr
*)addr
->src
,
516 cnt
+= scnprintf(buf
+ cnt
, len
- cnt
, "@");
517 sockaddr_to_str((struct sockaddr
*)addr
->dst
,
518 buf
+ cnt
, len
- cnt
);
521 EXPORT_SYMBOL(rtrs_addr_to_str
);
524 * rtrs_addr_to_sockaddr() - convert path string "src,dst" or "src@dst"
526 * @str: string containing source and destination addr of a path
527 * separated by ',' or '@' I.e. "ip:1.1.1.1,ip:1.1.1.2" or
528 * "ip:1.1.1.1@ip:1.1.1.2". If str contains only one address it's
529 * considered to be destination.
530 * @len: string length
531 * @port: Destination port number.
532 * @addr: will be set to the source/destination address or to NULL
533 * if str doesn't contain any source address.
535 * Returns zero if conversion successful. Non-zero otherwise.
537 int rtrs_addr_to_sockaddr(const char *str
, size_t len
, u16 port
,
538 struct rtrs_addr
*addr
)
542 d
= strchr(str
, ',');
544 d
= strchr(str
, '@');
546 if (rtrs_str_to_sockaddr(str
, d
- str
, 0, addr
->src
))
555 return rtrs_str_to_sockaddr(str
, len
, port
, addr
->dst
);
557 EXPORT_SYMBOL(rtrs_addr_to_sockaddr
);
559 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags
,
560 struct rtrs_rdma_dev_pd
*pool
)
562 INIT_LIST_HEAD(&pool
->list
);
563 mutex_init(&pool
->mutex
);
564 pool
->pd_flags
= pd_flags
;
566 EXPORT_SYMBOL(rtrs_rdma_dev_pd_init
);
568 void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd
*pool
)
570 mutex_destroy(&pool
->mutex
);
571 WARN_ON(!list_empty(&pool
->list
));
573 EXPORT_SYMBOL(rtrs_rdma_dev_pd_deinit
);
575 static void dev_free(struct kref
*ref
)
577 struct rtrs_rdma_dev_pd
*pool
;
578 struct rtrs_ib_dev
*dev
;
580 dev
= container_of(ref
, typeof(*dev
), ref
);
583 mutex_lock(&pool
->mutex
);
584 list_del(&dev
->entry
);
585 mutex_unlock(&pool
->mutex
);
587 ib_dealloc_pd(dev
->ib_pd
);
591 int rtrs_ib_dev_put(struct rtrs_ib_dev
*dev
)
593 return kref_put(&dev
->ref
, dev_free
);
595 EXPORT_SYMBOL(rtrs_ib_dev_put
);
597 static int rtrs_ib_dev_get(struct rtrs_ib_dev
*dev
)
599 return kref_get_unless_zero(&dev
->ref
);
603 rtrs_ib_dev_find_or_add(struct ib_device
*ib_dev
,
604 struct rtrs_rdma_dev_pd
*pool
)
606 struct rtrs_ib_dev
*dev
;
608 mutex_lock(&pool
->mutex
);
609 list_for_each_entry(dev
, &pool
->list
, entry
) {
610 if (dev
->ib_dev
->node_guid
== ib_dev
->node_guid
&&
611 rtrs_ib_dev_get(dev
))
614 mutex_unlock(&pool
->mutex
);
615 dev
= kzalloc(sizeof(*dev
), GFP_KERNEL
);
619 kref_init(&dev
->ref
);
621 dev
->ib_dev
= ib_dev
;
622 dev
->ib_pd
= ib_alloc_pd(ib_dev
, pool
->pd_flags
);
623 if (IS_ERR(dev
->ib_pd
))
626 if (pool
->ops
&& pool
->ops
->init
&& pool
->ops
->init(dev
))
629 mutex_lock(&pool
->mutex
);
630 list_add(&dev
->entry
, &pool
->list
);
632 mutex_unlock(&pool
->mutex
);
636 ib_dealloc_pd(dev
->ib_pd
);
642 EXPORT_SYMBOL(rtrs_ib_dev_find_or_add
);