1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
13 #include <linux/uuid.h>
14 #include <rdma/rdma_cm.h>
15 #include <rdma/ib_verbs.h>
20 #define RTRS_PROTO_VER_MAJOR 2
21 #define RTRS_PROTO_VER_MINOR 0
23 #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
24 __stringify(RTRS_PROTO_VER_MINOR)
27 * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
28 * and the minimum chunk size is 4096 (2^12).
29 * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory
30 * since queue_depth in rtrs_msg_conn_rsp is defined as le16.
31 * Therefore the pratical max value of sess_queue_depth is
32 * somewhere between 1 and 65535 and it depends on the system.
34 #define MAX_SESS_QUEUE_DEPTH 65535
37 MAX_IMM_TYPE_BITS
= 4,
38 MAX_IMM_TYPE_MASK
= ((1 << MAX_IMM_TYPE_BITS
) - 1),
39 MAX_IMM_PAYL_BITS
= 28,
40 MAX_IMM_PAYL_MASK
= ((1 << MAX_IMM_PAYL_BITS
) - 1),
44 RTRS_IO_REQ_IMM
= 0, /* client to server */
45 RTRS_IO_RSP_IMM
= 1, /* server to client */
46 RTRS_IO_RSP_W_INV_IMM
= 2, /* server to client */
48 RTRS_HB_MSG_IMM
= 8, /* HB: HeartBeat */
55 SERVICE_CON_QUEUE_DEPTH
= 512,
59 MIN_CHUNK_SIZE
= 8192,
61 RTRS_HB_INTERVAL_MS
= 5000,
62 RTRS_HB_MISSED_MAX
= 5,
65 RTRS_PROTO_VER
= (RTRS_PROTO_VER_MAJOR
<< 8) | RTRS_PROTO_VER_MINOR
,
70 struct rtrs_rdma_dev_pd_ops
{
71 int (*init
)(struct rtrs_ib_dev
*dev
);
72 void (*deinit
)(struct rtrs_ib_dev
*dev
);
75 struct rtrs_rdma_dev_pd
{
77 struct list_head list
;
78 enum ib_pd_flags pd_flags
;
79 const struct rtrs_rdma_dev_pd_ops
*ops
;
83 struct ib_device
*ib_dev
;
86 struct list_head entry
;
87 struct rtrs_rdma_dev_pd
*pool
;
88 struct ib_event_handler event_handler
;
92 struct rtrs_path
*path
;
95 struct rdma_cm_id
*cm_id
;
103 struct list_head entry
;
104 struct sockaddr_storage dst_addr
;
105 struct sockaddr_storage src_addr
;
106 char sessname
[NAME_MAX
];
108 struct rtrs_con
**con
;
109 unsigned int con_num
;
110 unsigned int irq_con_num
;
111 unsigned int recon_cnt
;
112 unsigned int signal_interval
;
113 struct rtrs_ib_dev
*dev
;
115 struct ib_cqe
*hb_cqe
;
116 void (*hb_err_handler
)(struct rtrs_con
*con
);
117 struct workqueue_struct
*hb_wq
;
118 struct delayed_work hb_dwork
;
119 unsigned int hb_interval_ms
;
120 unsigned int hb_missed_cnt
;
121 unsigned int hb_missed_max
;
122 ktime_t hb_last_sent
;
123 ktime_t hb_cur_latency
;
126 /* rtrs information unit */
132 enum dma_data_direction direction
;
136 * enum rtrs_msg_types - RTRS message types, see also rtrs/README
137 * @RTRS_MSG_INFO_REQ: Client additional info request to the server
138 * @RTRS_MSG_INFO_RSP: Server additional info response to the client
139 * @RTRS_MSG_WRITE: Client writes data per RDMA to server
140 * @RTRS_MSG_READ: Client requests data transfer from server
141 * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf
143 enum rtrs_msg_types
{
152 * enum rtrs_msg_flags - RTRS message flags.
153 * @RTRS_NEED_INVAL: Send invalidation in response.
154 * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
156 enum rtrs_msg_flags
{
157 RTRS_MSG_NEED_INVAL_F
= 1 << 0,
158 RTRS_MSG_NEW_RKEY_F
= 1 << 1,
162 * struct rtrs_sg_desc - RDMA-Buffer entry description
163 * @addr: Address of RDMA destination buffer
164 * @key: Authorization rkey to write to the buffer
165 * @len: Size of the buffer
167 struct rtrs_sg_desc
{
174 * struct rtrs_msg_conn_req - Client connection request to the server
176 * @version: RTRS protocol version
177 * @cid: Current connection id
178 * @cid_num: Number of connections per session
179 * @recon_cnt: Reconnections counter
180 * @sess_uuid: UUID of a session (path)
181 * @paths_uuid: UUID of a group of sessions (paths)
183 * NOTE: max size 56 bytes, see man rdma_connect().
185 struct rtrs_msg_conn_req
{
186 /* Is set to 0 by cma.c in case of AF_IB, do not touch that.
187 * see https://www.spinics.net/lists/linux-rdma/msg22397.html
190 /* On sender side that should be set to 0, or cma_save_ip_info()
191 * extract garbage and will fail.
202 u8 reserved_bits
: 7;
207 * struct rtrs_msg_conn_rsp - Server connection response to the client
209 * @version: RTRS protocol version
210 * @errno: If rdma_accept() then 0, if rdma_reject() indicates error
211 * @queue_depth: max inflight messages (queue-depth) in this session
212 * @max_io_size: max io size server supports
213 * @max_hdr_size: max msg header size server supports
215 * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
217 struct rtrs_msg_conn_rsp
{
229 * struct rtrs_msg_info_req
230 * @type: @RTRS_MSG_INFO_REQ
231 * @pathname: Path name chosen by client
233 struct rtrs_msg_info_req
{
235 u8 pathname
[NAME_MAX
];
240 * struct rtrs_msg_info_rsp
241 * @type: @RTRS_MSG_INFO_RSP
242 * @sg_cnt: Number of @desc entries
243 * @desc: RDMA buffers where the client can write to server
245 struct rtrs_msg_info_rsp
{
249 struct rtrs_sg_desc desc
[];
253 * struct rtrs_msg_rkey_rsp
254 * @type: @RTRS_MSG_RKEY_RSP
255 * @buf_id: RDMA buf_id of the new rkey
256 * @rkey: new remote key for RDMA buffers id from server
258 struct rtrs_msg_rkey_rsp
{
265 * struct rtrs_msg_rdma_read - RDMA data transfer request from client
266 * @type: always @RTRS_MSG_READ
267 * @usr_len: length of user payload
268 * @sg_cnt: number of @desc entries
269 * @desc: RDMA buffers where the server can write the result to
271 struct rtrs_msg_rdma_read
{
276 struct rtrs_sg_desc desc
[];
280 * struct_msg_rdma_write - Message transferred to server with RDMA-Write
281 * @type: always @RTRS_MSG_WRITE
282 * @usr_len: length of user payload
284 struct rtrs_msg_rdma_write
{
290 * struct_msg_rdma_hdr - header for read or write request
291 * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ
293 struct rtrs_msg_rdma_hdr
{
299 struct rtrs_iu
*rtrs_iu_alloc(u32 queue_num
, size_t size
, gfp_t t
,
300 struct ib_device
*dev
, enum dma_data_direction
,
301 void (*done
)(struct ib_cq
*cq
, struct ib_wc
*wc
));
302 void rtrs_iu_free(struct rtrs_iu
*iu
, struct ib_device
*dev
, u32 queue_num
);
303 int rtrs_iu_post_recv(struct rtrs_con
*con
, struct rtrs_iu
*iu
);
304 int rtrs_iu_post_send(struct rtrs_con
*con
, struct rtrs_iu
*iu
, size_t size
,
305 struct ib_send_wr
*head
);
306 int rtrs_iu_post_rdma_write_imm(struct rtrs_con
*con
, struct rtrs_iu
*iu
,
307 struct ib_sge
*sge
, unsigned int num_sge
,
308 u32 rkey
, u64 rdma_addr
, u32 imm_data
,
309 enum ib_send_flags flags
,
310 struct ib_send_wr
*head
,
311 struct ib_send_wr
*tail
);
313 int rtrs_post_recv_empty(struct rtrs_con
*con
, struct ib_cqe
*cqe
);
315 int rtrs_cq_qp_create(struct rtrs_path
*path
, struct rtrs_con
*con
,
316 u32 max_send_sge
, int cq_vector
, int nr_cqe
,
317 u32 max_send_wr
, u32 max_recv_wr
,
318 enum ib_poll_context poll_ctx
);
319 void rtrs_cq_qp_destroy(struct rtrs_con
*con
);
321 void rtrs_init_hb(struct rtrs_path
*path
, struct ib_cqe
*cqe
,
322 unsigned int interval_ms
, unsigned int missed_max
,
323 void (*err_handler
)(struct rtrs_con
*con
),
324 struct workqueue_struct
*wq
);
325 void rtrs_start_hb(struct rtrs_path
*path
);
326 void rtrs_stop_hb(struct rtrs_path
*path
);
327 void rtrs_send_hb_ack(struct rtrs_path
*path
);
329 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags
,
330 struct rtrs_rdma_dev_pd
*pool
);
331 void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd
*pool
);
333 struct rtrs_ib_dev
*rtrs_ib_dev_find_or_add(struct ib_device
*ib_dev
,
334 struct rtrs_rdma_dev_pd
*pool
);
335 int rtrs_ib_dev_put(struct rtrs_ib_dev
*dev
);
337 static inline u32
rtrs_to_imm(u32 type
, u32 payload
)
339 BUILD_BUG_ON(MAX_IMM_PAYL_BITS
+ MAX_IMM_TYPE_BITS
!= 32);
340 BUILD_BUG_ON(RTRS_LAST_IMM
> (1<<MAX_IMM_TYPE_BITS
));
341 return ((type
& MAX_IMM_TYPE_MASK
) << MAX_IMM_PAYL_BITS
) |
342 (payload
& MAX_IMM_PAYL_MASK
);
345 static inline void rtrs_from_imm(u32 imm
, u32
*type
, u32
*payload
)
347 *payload
= imm
& MAX_IMM_PAYL_MASK
;
348 *type
= imm
>> MAX_IMM_PAYL_BITS
;
351 static inline u32
rtrs_to_io_req_imm(u32 addr
)
353 return rtrs_to_imm(RTRS_IO_REQ_IMM
, addr
);
356 static inline u32
rtrs_to_io_rsp_imm(u32 msg_id
, int errno
, bool w_inval
)
358 enum rtrs_imm_type type
;
361 /* 9 bits for errno, 19 bits for msg_id */
362 payload
= (abs(errno
) & 0x1ff) << 19 | (msg_id
& 0x7ffff);
363 type
= w_inval
? RTRS_IO_RSP_W_INV_IMM
: RTRS_IO_RSP_IMM
;
365 return rtrs_to_imm(type
, payload
);
368 static inline void rtrs_from_io_rsp_imm(u32 payload
, u32
*msg_id
, int *errno
)
370 /* 9 bits for errno, 19 bits for msg_id */
371 *msg_id
= payload
& 0x7ffff;
372 *errno
= -(int)((payload
>> 19) & 0x1ff);
375 #define STAT_STORE_FUNC(type, set_value, reset) \
376 static ssize_t set_value##_store(struct kobject *kobj, \
377 struct kobj_attribute *attr, \
378 const char *buf, size_t count) \
381 type *stats = container_of(kobj, type, kobj_stats); \
383 if (sysfs_streq(buf, "1")) \
384 ret = reset(stats, true); \
385 else if (sysfs_streq(buf, "0")) \
386 ret = reset(stats, false); \
393 #define STAT_SHOW_FUNC(type, get_value, print) \
394 static ssize_t get_value##_show(struct kobject *kobj, \
395 struct kobj_attribute *attr, \
398 type *stats = container_of(kobj, type, kobj_stats); \
400 return print(stats, page); \
403 #define STAT_ATTR(type, stat, print, reset) \
404 STAT_STORE_FUNC(type, stat, reset) \
405 STAT_SHOW_FUNC(type, stat, print) \
406 static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
408 #endif /* RTRS_PRI_H */