2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 * CLC (connection layer control) handshake over initial TCP socket to
5 * prepare for RDMA traffic
7 * Copyright IBM Corp. 2016
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
13 #include <linux/if_ether.h>
14 #include <linux/sched/signal.h>
24 /* Wait for data on the tcp-socket, analyze received data
26 * 0 if success and it was not a decline that we received.
27 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
28 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
30 int smc_clc_wait_msg(struct smc_sock
*smc
, void *buf
, int buflen
,
33 struct sock
*clc_sk
= smc
->clcsock
->sk
;
34 struct smc_clc_msg_hdr
*clcm
= buf
;
35 struct msghdr msg
= {NULL
, 0};
41 /* peek the first few bytes to determine length of data to receive
42 * so we don't consume any subsequent CLC message or payload data
43 * in the TCP byte stream
47 krflags
= MSG_PEEK
| MSG_WAITALL
;
48 smc
->clcsock
->sk
->sk_rcvtimeo
= CLC_WAIT_TIME
;
49 len
= kernel_recvmsg(smc
->clcsock
, &msg
, &vec
, 1,
50 sizeof(struct smc_clc_msg_hdr
), krflags
);
51 if (signal_pending(current
)) {
53 clc_sk
->sk_err
= EINTR
;
54 smc
->sk
.sk_err
= EINTR
;
58 reason_code
= -clc_sk
->sk_err
;
59 smc
->sk
.sk_err
= clc_sk
->sk_err
;
62 if (!len
) { /* peer has performed orderly shutdown */
63 smc
->sk
.sk_err
= ECONNRESET
;
64 reason_code
= -ECONNRESET
;
68 smc
->sk
.sk_err
= -len
;
72 datlen
= ntohs(clcm
->length
);
73 if ((len
< sizeof(struct smc_clc_msg_hdr
)) ||
74 (datlen
< sizeof(struct smc_clc_msg_decline
)) ||
75 (datlen
> sizeof(struct smc_clc_msg_accept_confirm
)) ||
76 memcmp(clcm
->eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
)) ||
77 ((clcm
->type
!= SMC_CLC_DECLINE
) &&
78 (clcm
->type
!= expected_type
))) {
79 smc
->sk
.sk_err
= EPROTO
;
80 reason_code
= -EPROTO
;
84 /* receive the complete CLC message */
87 memset(&msg
, 0, sizeof(struct msghdr
));
88 krflags
= MSG_WAITALL
;
89 smc
->clcsock
->sk
->sk_rcvtimeo
= CLC_WAIT_TIME
;
90 len
= kernel_recvmsg(smc
->clcsock
, &msg
, &vec
, 1, datlen
, krflags
);
92 smc
->sk
.sk_err
= EPROTO
;
93 reason_code
= -EPROTO
;
96 if (clcm
->type
== SMC_CLC_DECLINE
) {
97 reason_code
= SMC_CLC_DECL_REPLY
;
98 if (ntohl(((struct smc_clc_msg_decline
*)buf
)->peer_diagnosis
)
99 == SMC_CLC_DECL_SYNCERR
)
100 smc
->conn
.lgr
->sync_err
= true;
107 /* send CLC DECLINE message across internal TCP socket */
108 int smc_clc_send_decline(struct smc_sock
*smc
, u32 peer_diag_info
,
111 struct smc_clc_msg_decline dclc
;
116 memset(&dclc
, 0, sizeof(dclc
));
117 memcpy(dclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
118 dclc
.hdr
.type
= SMC_CLC_DECLINE
;
119 dclc
.hdr
.length
= htons(sizeof(struct smc_clc_msg_decline
));
120 dclc
.hdr
.version
= SMC_CLC_V1
;
121 dclc
.hdr
.flag
= out_of_sync
? 1 : 0;
122 memcpy(dclc
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
123 dclc
.peer_diagnosis
= htonl(peer_diag_info
);
124 memcpy(dclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
126 memset(&msg
, 0, sizeof(msg
));
127 vec
.iov_base
= &dclc
;
128 vec
.iov_len
= sizeof(struct smc_clc_msg_decline
);
129 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1,
130 sizeof(struct smc_clc_msg_decline
));
131 if (len
< sizeof(struct smc_clc_msg_decline
))
132 smc
->sk
.sk_err
= EPROTO
;
134 smc
->sk
.sk_err
= -len
;
138 /* send CLC PROPOSAL message across internal TCP socket */
139 int smc_clc_send_proposal(struct smc_sock
*smc
,
140 struct smc_ib_device
*smcibdev
,
143 struct smc_clc_msg_proposal pclc
;
149 /* send SMC Proposal CLC message */
150 memset(&pclc
, 0, sizeof(pclc
));
151 memcpy(pclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
152 pclc
.hdr
.type
= SMC_CLC_PROPOSAL
;
153 pclc
.hdr
.length
= htons(sizeof(pclc
));
154 pclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
155 memcpy(pclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
156 memcpy(&pclc
.lcl
.gid
, &smcibdev
->gid
[ibport
- 1], SMC_GID_SIZE
);
157 memcpy(&pclc
.lcl
.mac
, &smcibdev
->mac
[ibport
- 1], ETH_ALEN
);
159 /* determine subnet and mask from internal TCP socket */
160 rc
= smc_netinfo_by_tcpsk(smc
->clcsock
, &pclc
.outgoing_subnet
,
163 return SMC_CLC_DECL_CNFERR
; /* configuration error */
164 memcpy(pclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
165 memset(&msg
, 0, sizeof(msg
));
166 vec
.iov_base
= &pclc
;
167 vec
.iov_len
= sizeof(pclc
);
168 /* due to the few bytes needed for clc-handshake this cannot block */
169 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1, sizeof(pclc
));
170 if (len
< sizeof(pclc
)) {
172 reason_code
= -ENETUNREACH
;
173 smc
->sk
.sk_err
= -reason_code
;
175 smc
->sk
.sk_err
= smc
->clcsock
->sk
->sk_err
;
176 reason_code
= -smc
->sk
.sk_err
;
183 /* send CLC CONFIRM message across internal TCP socket */
184 int smc_clc_send_confirm(struct smc_sock
*smc
)
186 struct smc_connection
*conn
= &smc
->conn
;
187 struct smc_clc_msg_accept_confirm cclc
;
188 struct smc_link
*link
;
194 link
= &conn
->lgr
->lnk
[SMC_SINGLE_LINK
];
195 /* send SMC Confirm CLC msg */
196 memset(&cclc
, 0, sizeof(cclc
));
197 memcpy(cclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
198 cclc
.hdr
.type
= SMC_CLC_CONFIRM
;
199 cclc
.hdr
.length
= htons(sizeof(cclc
));
200 cclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
201 memcpy(cclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
202 memcpy(&cclc
.lcl
.gid
, &link
->smcibdev
->gid
[link
->ibport
- 1],
204 memcpy(&cclc
.lcl
.mac
, &link
->smcibdev
->mac
[link
->ibport
- 1], ETH_ALEN
);
205 hton24(cclc
.qpn
, link
->roce_qp
->qp_num
);
207 htonl(conn
->rmb_desc
->rkey
[SMC_SINGLE_LINK
]);
208 cclc
.conn_idx
= 1; /* for now: 1 RMB = 1 RMBE */
209 cclc
.rmbe_alert_token
= htonl(conn
->alert_token_local
);
210 cclc
.qp_mtu
= min(link
->path_mtu
, link
->peer_mtu
);
211 cclc
.rmbe_size
= conn
->rmbe_size_short
;
213 cpu_to_be64((u64
)conn
->rmb_desc
->dma_addr
[SMC_SINGLE_LINK
]);
214 hton24(cclc
.psn
, link
->psn_initial
);
216 memcpy(cclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
218 memset(&msg
, 0, sizeof(msg
));
219 vec
.iov_base
= &cclc
;
220 vec
.iov_len
= sizeof(cclc
);
221 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1, sizeof(cclc
));
222 if (len
< sizeof(cclc
)) {
224 reason_code
= -ENETUNREACH
;
225 smc
->sk
.sk_err
= -reason_code
;
227 smc
->sk
.sk_err
= smc
->clcsock
->sk
->sk_err
;
228 reason_code
= -smc
->sk
.sk_err
;
234 /* send CLC ACCEPT message across internal TCP socket */
235 int smc_clc_send_accept(struct smc_sock
*new_smc
, int srv_first_contact
)
237 struct smc_connection
*conn
= &new_smc
->conn
;
238 struct smc_clc_msg_accept_confirm aclc
;
239 struct smc_link
*link
;
245 link
= &conn
->lgr
->lnk
[SMC_SINGLE_LINK
];
246 memset(&aclc
, 0, sizeof(aclc
));
247 memcpy(aclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
248 aclc
.hdr
.type
= SMC_CLC_ACCEPT
;
249 aclc
.hdr
.length
= htons(sizeof(aclc
));
250 aclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
251 if (srv_first_contact
)
253 memcpy(aclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
254 memcpy(&aclc
.lcl
.gid
, &link
->smcibdev
->gid
[link
->ibport
- 1],
256 memcpy(&aclc
.lcl
.mac
, link
->smcibdev
->mac
[link
->ibport
- 1], ETH_ALEN
);
257 hton24(aclc
.qpn
, link
->roce_qp
->qp_num
);
259 htonl(conn
->rmb_desc
->rkey
[SMC_SINGLE_LINK
]);
260 aclc
.conn_idx
= 1; /* as long as 1 RMB = 1 RMBE */
261 aclc
.rmbe_alert_token
= htonl(conn
->alert_token_local
);
262 aclc
.qp_mtu
= link
->path_mtu
;
263 aclc
.rmbe_size
= conn
->rmbe_size_short
,
265 cpu_to_be64((u64
)conn
->rmb_desc
->dma_addr
[SMC_SINGLE_LINK
]);
266 hton24(aclc
.psn
, link
->psn_initial
);
267 memcpy(aclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
269 memset(&msg
, 0, sizeof(msg
));
270 vec
.iov_base
= &aclc
;
271 vec
.iov_len
= sizeof(aclc
);
272 len
= kernel_sendmsg(new_smc
->clcsock
, &msg
, &vec
, 1, sizeof(aclc
));
273 if (len
< sizeof(aclc
)) {
275 new_smc
->sk
.sk_err
= EPROTO
;
277 new_smc
->sk
.sk_err
= new_smc
->clcsock
->sk
->sk_err
;
278 rc
= sock_error(&new_smc
->sk
);