1 /* SPDX-License-Identifier: GPL-2.0 */
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Definitions for the SMC module (socket related)
7 * Copyright IBM Corp. 2016
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
14 #include <linux/socket.h>
15 #include <linux/types.h>
16 #include <linux/compiler.h> /* __aligned */
17 #include <net/genetlink.h>
22 #define SMC_V1 1 /* SMC version V1 */
23 #define SMC_V2 2 /* SMC version V2 */
25 #define SMC_RELEASE_0 0
26 #define SMC_RELEASE_1 1
27 #define SMC_RELEASE SMC_RELEASE_1 /* the latest release version */
29 #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
30 #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
32 #define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */
34 extern struct proto smc_proto
;
35 extern struct proto smc_proto6
;
37 extern struct smc_hashinfo smc_v4_hashinfo
;
38 extern struct smc_hashinfo smc_v6_hashinfo
;
40 int smc_hash_sk(struct sock
*sk
);
41 void smc_unhash_sk(struct sock
*sk
);
42 void smc_release_cb(struct sock
*sk
);
44 int smc_release(struct socket
*sock
);
45 int smc_bind(struct socket
*sock
, struct sockaddr
*uaddr
,
47 int smc_connect(struct socket
*sock
, struct sockaddr
*addr
,
49 int smc_accept(struct socket
*sock
, struct socket
*new_sock
,
50 struct proto_accept_arg
*arg
);
51 int smc_getname(struct socket
*sock
, struct sockaddr
*addr
,
53 __poll_t
smc_poll(struct file
*file
, struct socket
*sock
,
55 int smc_ioctl(struct socket
*sock
, unsigned int cmd
,
57 int smc_listen(struct socket
*sock
, int backlog
);
58 int smc_shutdown(struct socket
*sock
, int how
);
59 int smc_setsockopt(struct socket
*sock
, int level
, int optname
,
60 sockptr_t optval
, unsigned int optlen
);
61 int smc_getsockopt(struct socket
*sock
, int level
, int optname
,
62 char __user
*optval
, int __user
*optlen
);
63 int smc_sendmsg(struct socket
*sock
, struct msghdr
*msg
, size_t len
);
64 int smc_recvmsg(struct socket
*sock
, struct msghdr
*msg
, size_t len
,
66 ssize_t
smc_splice_read(struct socket
*sock
, loff_t
*ppos
,
67 struct pipe_inode_info
*pipe
, size_t len
,
70 /* smc sock initialization */
71 void smc_sk_init(struct net
*net
, struct sock
*sk
, int protocol
);
72 /* clcsock initialization */
73 int smc_create_clcsk(struct net
*net
, struct sock
*sk
, int family
);
76 #define KERNEL_HAS_ATOMIC64
79 enum smc_state
{ /* possible states of an SMC socket */
85 SMC_PEERCLOSEWAIT1
= 20,
86 SMC_PEERCLOSEWAIT2
= 21,
87 SMC_APPFINCLOSEWAIT
= 24,
88 SMC_APPCLOSEWAIT1
= 22,
89 SMC_APPCLOSEWAIT2
= 23,
90 SMC_PEERFINCLOSEWAIT
= 25,
92 SMC_PEERABORTWAIT
= 26,
93 SMC_PROCESSABORT
= 27,
96 enum smc_supplemental_features
{
97 SMC_SPF_EMULATED_ISM_DEV
= 0,
100 #define SMC_FEATURE_MASK \
101 (BIT(SMC_SPF_EMULATED_ISM_DEV))
103 struct smc_link_group
;
105 struct smc_wr_rx_hdr
{ /* common prefix part of LLC and CDC to demultiplex */
108 #if defined(__BIG_ENDIAN_BITFIELD)
113 #elif defined(__LITTLE_ENDIAN_BITFIELD)
122 struct smc_cdc_conn_state_flags
{
123 #if defined(__BIG_ENDIAN_BITFIELD)
124 u8 peer_done_writing
: 1; /* Sending done indicator */
125 u8 peer_conn_closed
: 1; /* Peer connection closed indicator */
126 u8 peer_conn_abort
: 1; /* Abnormal close indicator */
128 #elif defined(__LITTLE_ENDIAN_BITFIELD)
130 u8 peer_conn_abort
: 1;
131 u8 peer_conn_closed
: 1;
132 u8 peer_done_writing
: 1;
136 struct smc_cdc_producer_flags
{
137 #if defined(__BIG_ENDIAN_BITFIELD)
138 u8 write_blocked
: 1; /* Writing Blocked, no rx buf space */
139 u8 urg_data_pending
: 1; /* Urgent Data Pending */
140 u8 urg_data_present
: 1; /* Urgent Data Present */
141 u8 cons_curs_upd_req
: 1; /* cursor update requested */
142 u8 failover_validation
: 1;/* message replay due to failover */
144 #elif defined(__LITTLE_ENDIAN_BITFIELD)
146 u8 failover_validation
: 1;
147 u8 cons_curs_upd_req
: 1;
148 u8 urg_data_present
: 1;
149 u8 urg_data_pending
: 1;
150 u8 write_blocked
: 1;
154 /* in host byte order */
155 union smc_host_cursor
{ /* SMC cursor - an offset in an RMBE */
158 u16 wrap
; /* window wrap sequence number */
159 u32 count
; /* cursor (= offset) part */
161 #ifdef KERNEL_HAS_ATOMIC64
162 atomic64_t acurs
; /* for atomic processing */
164 u64 acurs
; /* for atomic processing */
168 /* in host byte order, except for flag bitfields in network byte order */
169 struct smc_host_cdc_msg
{ /* Connection Data Control message */
170 struct smc_wr_rx_hdr common
; /* .type = 0xFE */
171 u8 len
; /* length = 44 */
172 u16 seqno
; /* connection seq # */
173 u32 token
; /* alert_token */
174 union smc_host_cursor prod
; /* producer cursor */
175 union smc_host_cursor cons
; /* consumer cursor,
178 struct smc_cdc_producer_flags prod_flags
; /* conn. tx/rx status */
179 struct smc_cdc_conn_state_flags conn_state_flags
; /* peer conn. status*/
184 SMC_URG_VALID
= 1, /* data present */
185 SMC_URG_NOTYET
= 2, /* data pending */
186 SMC_URG_READ
= 3, /* data was already read */
189 struct smc_mark_woken
{
192 wait_queue_entry_t wait_entry
;
195 struct smc_connection
{
196 struct rb_node alert_node
;
197 struct smc_link_group
*lgr
; /* link group of connection */
198 struct smc_link
*lnk
; /* assigned SMC-R link */
199 u32 alert_token_local
; /* unique conn. id */
200 u8 peer_rmbe_idx
; /* from tcp handshake */
201 int peer_rmbe_size
; /* size of peer rx buffer */
202 atomic_t peer_rmbe_space
;/* remaining free bytes in peer
205 int rtoken_idx
; /* idx to peer RMB rkey/addr */
207 struct smc_buf_desc
*sndbuf_desc
; /* send buffer descriptor */
208 struct smc_buf_desc
*rmb_desc
; /* RMBE descriptor */
209 int rmbe_size_comp
; /* compressed notation */
210 int rmbe_update_limit
;
211 /* lower limit for consumer
215 struct smc_host_cdc_msg local_tx_ctrl
; /* host byte order staging
216 * buffer for CDC msg send
217 * .prod cf. TCP snd_nxt
218 * .cons cf. TCP sends ack
220 union smc_host_cursor local_tx_ctrl_fin
;
221 /* prod crsr - confirmed by peer
223 union smc_host_cursor tx_curs_prep
; /* tx - prepared data
224 * snd_max..wmem_alloc
226 union smc_host_cursor tx_curs_sent
; /* tx - sent data
229 union smc_host_cursor tx_curs_fin
; /* tx - confirmed by peer
232 atomic_t sndbuf_space
; /* remaining space in sndbuf */
233 u16 tx_cdc_seq
; /* sequence # for CDC send */
234 u16 tx_cdc_seq_fin
; /* sequence # - tx completed */
235 spinlock_t send_lock
; /* protect wr_sends */
236 atomic_t cdc_pend_tx_wr
; /* number of pending tx CDC wqe
237 * - inc when post wqe,
238 * - dec on polled tx cqe
240 wait_queue_head_t cdc_pend_tx_wq
; /* wakeup on no cdc_pend_tx_wr*/
241 struct delayed_work tx_work
; /* retry of smc_cdc_msg_send */
242 u32 tx_off
; /* base offset in peer rmb */
244 struct smc_host_cdc_msg local_rx_ctrl
; /* filled during event_handl.
245 * .prod cf. TCP rcv_nxt
246 * .cons cf. TCP snd_una
248 union smc_host_cursor rx_curs_confirmed
; /* confirmed to peer
249 * source of snd_una ?
251 union smc_host_cursor urg_curs
; /* points at urgent byte */
252 enum smc_urg_state urg_state
;
253 bool urg_tx_pend
; /* urgent data staged */
254 bool urg_rx_skip_pend
;
255 /* indicate urgent oob data
256 * read, but previous regular
259 char urg_rx_byte
; /* urgent byte */
260 bool tx_in_release_sock
;
261 /* flush pending tx data in
264 atomic_t bytes_to_rcv
; /* arrived data,
267 atomic_t splice_pending
; /* number of spliced bytes
270 #ifndef KERNEL_HAS_ATOMIC64
271 spinlock_t acurs_lock
; /* protect cursors */
273 struct work_struct close_work
; /* peer sent some closing */
274 struct work_struct abort_work
; /* abort the connection */
275 struct tasklet_struct rx_tsklet
; /* Receiver tasklet for SMC-D */
276 u8 rx_off
; /* receive offset:
277 * 0 for SMC-R, 32 for SMC-D
279 u64 peer_token
; /* SMC-D token of peer */
280 u8 killed
: 1; /* abnormal termination */
281 u8 freed
: 1; /* normal termiation */
282 u8 out_of_sync
: 1; /* out of sync with peer */
285 struct smc_sock
{ /* smc sock container */
287 #if IS_ENABLED(CONFIG_IPV6)
288 struct ipv6_pinfo
*pinet6
;
290 struct socket
*clcsock
; /* internal tcp socket */
291 void (*clcsk_state_change
)(struct sock
*sk
);
292 /* original stat_change fct. */
293 void (*clcsk_data_ready
)(struct sock
*sk
);
294 /* original data_ready fct. */
295 void (*clcsk_write_space
)(struct sock
*sk
);
296 /* original write_space fct. */
297 void (*clcsk_error_report
)(struct sock
*sk
);
298 /* original error_report fct. */
299 struct smc_connection conn
; /* smc connection */
300 struct smc_sock
*listen_smc
; /* listen parent */
301 struct work_struct connect_work
; /* handle non-blocking connect*/
302 struct work_struct tcp_listen_work
;/* handle tcp socket accepts */
303 struct work_struct smc_listen_work
;/* prepare new accept socket */
304 struct list_head accept_q
; /* sockets to be accepted */
305 spinlock_t accept_q_lock
; /* protects accept_q */
306 bool limit_smc_hs
; /* put constraint on handshake */
307 bool use_fallback
; /* fallback to tcp */
308 int fallback_rsn
; /* reason for fallback */
309 u32 peer_diagnosis
; /* decline reason from peer */
310 atomic_t queued_smc_hs
; /* queued smc handshakes */
311 struct inet_connection_sock_af_ops af_ops
;
312 const struct inet_connection_sock_af_ops
*ori_af_ops
;
313 /* original af ops */
314 int sockopt_defer_accept
;
315 /* sockopt TCP_DEFER_ACCEPT
318 u8 wait_close_tx_prepared
: 1;
319 /* shutdown wr or close
320 * started, waiting for unsent
323 u8 connect_nonblock
: 1;
324 /* non-blocking connect in
327 struct mutex clcsock_release_lock
;
328 /* protects clcsock of a listen
333 #define smc_sk(ptr) container_of_const(ptr, struct smc_sock, sk)
335 static inline void smc_init_saved_callbacks(struct smc_sock
*smc
)
337 smc
->clcsk_state_change
= NULL
;
338 smc
->clcsk_data_ready
= NULL
;
339 smc
->clcsk_write_space
= NULL
;
340 smc
->clcsk_error_report
= NULL
;
343 static inline struct smc_sock
*smc_clcsock_user_data(const struct sock
*clcsk
)
345 return (struct smc_sock
*)
346 ((uintptr_t)clcsk
->sk_user_data
& ~SK_USER_DATA_NOCOPY
);
349 /* save target_cb in saved_cb, and replace target_cb with new_cb */
350 static inline void smc_clcsock_replace_cb(void (**target_cb
)(struct sock
*),
351 void (*new_cb
)(struct sock
*),
352 void (**saved_cb
)(struct sock
*))
356 *saved_cb
= *target_cb
;
360 /* restore target_cb to saved_cb, and reset saved_cb to NULL */
361 static inline void smc_clcsock_restore_cb(void (**target_cb
)(struct sock
*),
362 void (**saved_cb
)(struct sock
*))
366 *target_cb
= *saved_cb
;
370 extern struct workqueue_struct
*smc_hs_wq
; /* wq for handshake work */
371 extern struct workqueue_struct
*smc_close_wq
; /* wq for close work */
373 #define SMC_SYSTEMID_LEN 8
375 extern u8 local_systemid
[SMC_SYSTEMID_LEN
]; /* unique system identifier */
377 #define ntohll(x) be64_to_cpu(x)
378 #define htonll(x) cpu_to_be64(x)
380 /* convert an u32 value into network byte order, store it into a 3 byte field */
381 static inline void hton24(u8
*net
, u32 host
)
385 t
= cpu_to_be32(host
);
386 memcpy(net
, ((u8
*)&t
) + 1, 3);
389 /* convert a received 3 byte field into host byte order*/
390 static inline u32
ntoh24(u8
*net
)
394 memcpy(((u8
*)&t
) + 1, net
, 3);
395 return be32_to_cpu(t
);
399 static inline bool using_ipsec(struct smc_sock
*smc
)
401 return (smc
->clcsock
->sk
->sk_policy
[0] ||
402 smc
->clcsock
->sk
->sk_policy
[1]) ? true : false;
405 static inline bool using_ipsec(struct smc_sock
*smc
)
413 struct sock
*smc_accept_dequeue(struct sock
*parent
, struct socket
*new_sock
);
414 void smc_close_non_accepted(struct sock
*sk
);
415 void smc_fill_gid_list(struct smc_link_group
*lgr
,
416 struct smc_gidlist
*gidlist
,
417 struct smc_ib_device
*known_dev
, u8
*known_gid
);
419 /* smc handshake limitation interface for netlink */
420 int smc_nl_dump_hs_limitation(struct sk_buff
*skb
, struct netlink_callback
*cb
);
421 int smc_nl_enable_hs_limitation(struct sk_buff
*skb
, struct genl_info
*info
);
422 int smc_nl_disable_hs_limitation(struct sk_buff
*skb
, struct genl_info
*info
);
424 static inline void smc_sock_set_flag(struct sock
*sk
, enum sock_flags flag
)
426 set_bit(flag
, &sk
->sk_flags
);