1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * CLC (connection layer control) handshake over initial TCP socket to
6 * prepare for RDMA traffic
8 * Copyright IBM Corp. 2016
10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
14 #include <linux/if_ether.h>
15 #include <linux/sched/signal.h>
25 /* check if received message has a correct header length and contains valid
26 * heading and trailing eyecatchers
28 static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr
*clcm
)
30 struct smc_clc_msg_proposal_prefix
*pclc_prfx
;
31 struct smc_clc_msg_accept_confirm
*clc
;
32 struct smc_clc_msg_proposal
*pclc
;
33 struct smc_clc_msg_decline
*dclc
;
34 struct smc_clc_msg_trail
*trl
;
36 if (memcmp(clcm
->eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
)))
39 case SMC_CLC_PROPOSAL
:
40 pclc
= (struct smc_clc_msg_proposal
*)clcm
;
41 pclc_prfx
= smc_clc_proposal_get_prefix(pclc
);
42 if (ntohs(pclc
->hdr
.length
) !=
43 sizeof(*pclc
) + ntohs(pclc
->iparea_offset
) +
45 pclc_prfx
->ipv6_prefixes_cnt
*
46 sizeof(struct smc_clc_ipv6_prefix
) +
49 trl
= (struct smc_clc_msg_trail
*)
50 ((u8
*)pclc
+ ntohs(pclc
->hdr
.length
) - sizeof(*trl
));
54 clc
= (struct smc_clc_msg_accept_confirm
*)clcm
;
55 if (ntohs(clc
->hdr
.length
) != sizeof(*clc
))
60 dclc
= (struct smc_clc_msg_decline
*)clcm
;
61 if (ntohs(dclc
->hdr
.length
) != sizeof(*dclc
))
68 if (memcmp(trl
->eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
)))
73 /* Wait for data on the tcp-socket, analyze received data
75 * 0 if success and it was not a decline that we received.
76 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
77 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
79 int smc_clc_wait_msg(struct smc_sock
*smc
, void *buf
, int buflen
,
82 struct sock
*clc_sk
= smc
->clcsock
->sk
;
83 struct smc_clc_msg_hdr
*clcm
= buf
;
84 struct msghdr msg
= {NULL
, 0};
86 struct kvec vec
= {buf
, buflen
};
90 /* peek the first few bytes to determine length of data to receive
91 * so we don't consume any subsequent CLC message or payload data
92 * in the TCP byte stream
95 * Caller must make sure that buflen is no less than
96 * sizeof(struct smc_clc_msg_hdr)
98 krflags
= MSG_PEEK
| MSG_WAITALL
;
99 smc
->clcsock
->sk
->sk_rcvtimeo
= CLC_WAIT_TIME
;
100 iov_iter_kvec(&msg
.msg_iter
, READ
| ITER_KVEC
, &vec
, 1,
101 sizeof(struct smc_clc_msg_hdr
));
102 len
= sock_recvmsg(smc
->clcsock
, &msg
, krflags
);
103 if (signal_pending(current
)) {
104 reason_code
= -EINTR
;
105 clc_sk
->sk_err
= EINTR
;
106 smc
->sk
.sk_err
= EINTR
;
109 if (clc_sk
->sk_err
) {
110 reason_code
= -clc_sk
->sk_err
;
111 smc
->sk
.sk_err
= clc_sk
->sk_err
;
114 if (!len
) { /* peer has performed orderly shutdown */
115 smc
->sk
.sk_err
= ECONNRESET
;
116 reason_code
= -ECONNRESET
;
120 smc
->sk
.sk_err
= -len
;
124 datlen
= ntohs(clcm
->length
);
125 if ((len
< sizeof(struct smc_clc_msg_hdr
)) ||
127 ((clcm
->type
!= SMC_CLC_DECLINE
) &&
128 (clcm
->type
!= expected_type
))) {
129 smc
->sk
.sk_err
= EPROTO
;
130 reason_code
= -EPROTO
;
134 /* receive the complete CLC message */
135 memset(&msg
, 0, sizeof(struct msghdr
));
136 iov_iter_kvec(&msg
.msg_iter
, READ
| ITER_KVEC
, &vec
, 1, buflen
);
137 krflags
= MSG_WAITALL
;
138 smc
->clcsock
->sk
->sk_rcvtimeo
= CLC_WAIT_TIME
;
139 len
= sock_recvmsg(smc
->clcsock
, &msg
, krflags
);
140 if (len
< datlen
|| !smc_clc_msg_hdr_valid(clcm
)) {
141 smc
->sk
.sk_err
= EPROTO
;
142 reason_code
= -EPROTO
;
145 if (clcm
->type
== SMC_CLC_DECLINE
) {
146 reason_code
= SMC_CLC_DECL_REPLY
;
147 if (((struct smc_clc_msg_decline
*)buf
)->hdr
.flag
) {
148 smc
->conn
.lgr
->sync_err
= true;
149 smc_lgr_terminate(smc
->conn
.lgr
);
157 /* send CLC DECLINE message across internal TCP socket */
158 int smc_clc_send_decline(struct smc_sock
*smc
, u32 peer_diag_info
)
160 struct smc_clc_msg_decline dclc
;
165 memset(&dclc
, 0, sizeof(dclc
));
166 memcpy(dclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
167 dclc
.hdr
.type
= SMC_CLC_DECLINE
;
168 dclc
.hdr
.length
= htons(sizeof(struct smc_clc_msg_decline
));
169 dclc
.hdr
.version
= SMC_CLC_V1
;
170 dclc
.hdr
.flag
= (peer_diag_info
== SMC_CLC_DECL_SYNCERR
) ? 1 : 0;
171 memcpy(dclc
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
172 dclc
.peer_diagnosis
= htonl(peer_diag_info
);
173 memcpy(dclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
175 memset(&msg
, 0, sizeof(msg
));
176 vec
.iov_base
= &dclc
;
177 vec
.iov_len
= sizeof(struct smc_clc_msg_decline
);
178 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1,
179 sizeof(struct smc_clc_msg_decline
));
180 if (len
< sizeof(struct smc_clc_msg_decline
))
181 smc
->sk
.sk_err
= EPROTO
;
183 smc
->sk
.sk_err
= -len
;
184 return sock_error(&smc
->sk
);
187 /* send CLC PROPOSAL message across internal TCP socket */
188 int smc_clc_send_proposal(struct smc_sock
*smc
,
189 struct smc_ib_device
*smcibdev
,
192 struct smc_clc_msg_proposal_prefix pclc_prfx
;
193 struct smc_clc_msg_proposal pclc
;
194 struct smc_clc_msg_trail trl
;
200 /* send SMC Proposal CLC message */
201 plen
= sizeof(pclc
) + sizeof(pclc_prfx
) + sizeof(trl
);
202 memset(&pclc
, 0, sizeof(pclc
));
203 memcpy(pclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
204 pclc
.hdr
.type
= SMC_CLC_PROPOSAL
;
205 pclc
.hdr
.length
= htons(plen
);
206 pclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
207 memcpy(pclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
208 memcpy(&pclc
.lcl
.gid
, &smcibdev
->gid
[ibport
- 1], SMC_GID_SIZE
);
209 memcpy(&pclc
.lcl
.mac
, &smcibdev
->mac
[ibport
- 1], ETH_ALEN
);
210 pclc
.iparea_offset
= htons(0);
212 memset(&pclc_prfx
, 0, sizeof(pclc_prfx
));
213 /* determine subnet and mask from internal TCP socket */
214 rc
= smc_netinfo_by_tcpsk(smc
->clcsock
, &pclc_prfx
.outgoing_subnet
,
215 &pclc_prfx
.prefix_len
);
217 return SMC_CLC_DECL_CNFERR
; /* configuration error */
218 pclc_prfx
.ipv6_prefixes_cnt
= 0;
219 memcpy(trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
220 memset(&msg
, 0, sizeof(msg
));
221 vec
[0].iov_base
= &pclc
;
222 vec
[0].iov_len
= sizeof(pclc
);
223 vec
[1].iov_base
= &pclc_prfx
;
224 vec
[1].iov_len
= sizeof(pclc_prfx
);
225 vec
[2].iov_base
= &trl
;
226 vec
[2].iov_len
= sizeof(trl
);
227 /* due to the few bytes needed for clc-handshake this cannot block */
228 len
= kernel_sendmsg(smc
->clcsock
, &msg
, vec
, 3, plen
);
229 if (len
< sizeof(pclc
)) {
231 reason_code
= -ENETUNREACH
;
232 smc
->sk
.sk_err
= -reason_code
;
234 smc
->sk
.sk_err
= smc
->clcsock
->sk
->sk_err
;
235 reason_code
= -smc
->sk
.sk_err
;
242 /* send CLC CONFIRM message across internal TCP socket */
243 int smc_clc_send_confirm(struct smc_sock
*smc
)
245 struct smc_connection
*conn
= &smc
->conn
;
246 struct smc_clc_msg_accept_confirm cclc
;
247 struct smc_link
*link
;
253 link
= &conn
->lgr
->lnk
[SMC_SINGLE_LINK
];
254 /* send SMC Confirm CLC msg */
255 memset(&cclc
, 0, sizeof(cclc
));
256 memcpy(cclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
257 cclc
.hdr
.type
= SMC_CLC_CONFIRM
;
258 cclc
.hdr
.length
= htons(sizeof(cclc
));
259 cclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
260 memcpy(cclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
261 memcpy(&cclc
.lcl
.gid
, &link
->smcibdev
->gid
[link
->ibport
- 1],
263 memcpy(&cclc
.lcl
.mac
, &link
->smcibdev
->mac
[link
->ibport
- 1], ETH_ALEN
);
264 hton24(cclc
.qpn
, link
->roce_qp
->qp_num
);
266 htonl(conn
->rmb_desc
->mr_rx
[SMC_SINGLE_LINK
]->rkey
);
267 cclc
.conn_idx
= 1; /* for now: 1 RMB = 1 RMBE */
268 cclc
.rmbe_alert_token
= htonl(conn
->alert_token_local
);
269 cclc
.qp_mtu
= min(link
->path_mtu
, link
->peer_mtu
);
270 cclc
.rmbe_size
= conn
->rmbe_size_short
;
271 cclc
.rmb_dma_addr
= cpu_to_be64(
272 (u64
)sg_dma_address(conn
->rmb_desc
->sgt
[SMC_SINGLE_LINK
].sgl
));
273 hton24(cclc
.psn
, link
->psn_initial
);
275 memcpy(cclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
277 memset(&msg
, 0, sizeof(msg
));
278 vec
.iov_base
= &cclc
;
279 vec
.iov_len
= sizeof(cclc
);
280 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1, sizeof(cclc
));
281 if (len
< sizeof(cclc
)) {
283 reason_code
= -ENETUNREACH
;
284 smc
->sk
.sk_err
= -reason_code
;
286 smc
->sk
.sk_err
= smc
->clcsock
->sk
->sk_err
;
287 reason_code
= -smc
->sk
.sk_err
;
293 /* send CLC ACCEPT message across internal TCP socket */
294 int smc_clc_send_accept(struct smc_sock
*new_smc
, int srv_first_contact
)
296 struct smc_connection
*conn
= &new_smc
->conn
;
297 struct smc_clc_msg_accept_confirm aclc
;
298 struct smc_link
*link
;
304 link
= &conn
->lgr
->lnk
[SMC_SINGLE_LINK
];
305 memset(&aclc
, 0, sizeof(aclc
));
306 memcpy(aclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
307 aclc
.hdr
.type
= SMC_CLC_ACCEPT
;
308 aclc
.hdr
.length
= htons(sizeof(aclc
));
309 aclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
310 if (srv_first_contact
)
312 memcpy(aclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
313 memcpy(&aclc
.lcl
.gid
, &link
->smcibdev
->gid
[link
->ibport
- 1],
315 memcpy(&aclc
.lcl
.mac
, link
->smcibdev
->mac
[link
->ibport
- 1], ETH_ALEN
);
316 hton24(aclc
.qpn
, link
->roce_qp
->qp_num
);
318 htonl(conn
->rmb_desc
->mr_rx
[SMC_SINGLE_LINK
]->rkey
);
319 aclc
.conn_idx
= 1; /* as long as 1 RMB = 1 RMBE */
320 aclc
.rmbe_alert_token
= htonl(conn
->alert_token_local
);
321 aclc
.qp_mtu
= link
->path_mtu
;
322 aclc
.rmbe_size
= conn
->rmbe_size_short
,
323 aclc
.rmb_dma_addr
= cpu_to_be64(
324 (u64
)sg_dma_address(conn
->rmb_desc
->sgt
[SMC_SINGLE_LINK
].sgl
));
325 hton24(aclc
.psn
, link
->psn_initial
);
326 memcpy(aclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
328 memset(&msg
, 0, sizeof(msg
));
329 vec
.iov_base
= &aclc
;
330 vec
.iov_len
= sizeof(aclc
);
331 len
= kernel_sendmsg(new_smc
->clcsock
, &msg
, &vec
, 1, sizeof(aclc
));
332 if (len
< sizeof(aclc
)) {
334 new_smc
->sk
.sk_err
= EPROTO
;
336 new_smc
->sk
.sk_err
= new_smc
->clcsock
->sk
->sk_err
;
337 rc
= sock_error(&new_smc
->sk
);