Linux 4.18.10
[linux/fpc-iii.git] / net / smc / smc_clc.c
blobae5d168653cecf804b20e49f27bb39bcf0385081
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * CLC (connection layer control) handshake over initial TCP socket to
6 * prepare for RDMA traffic
8 * Copyright IBM Corp. 2016, 2018
10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
13 #include <linux/in.h>
14 #include <linux/inetdevice.h>
15 #include <linux/if_ether.h>
16 #include <linux/sched/signal.h>
18 #include <net/addrconf.h>
19 #include <net/sock.h>
20 #include <net/tcp.h>
22 #include "smc.h"
23 #include "smc_core.h"
24 #include "smc_clc.h"
25 #include "smc_ib.h"
27 /* eye catcher "SMCR" EBCDIC for CLC messages */
28 static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
30 /* check if received message has a correct header length and contains valid
31 * heading and trailing eyecatchers
33 static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
35 struct smc_clc_msg_proposal_prefix *pclc_prfx;
36 struct smc_clc_msg_accept_confirm *clc;
37 struct smc_clc_msg_proposal *pclc;
38 struct smc_clc_msg_decline *dclc;
39 struct smc_clc_msg_trail *trl;
41 if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
42 return false;
43 switch (clcm->type) {
44 case SMC_CLC_PROPOSAL:
45 pclc = (struct smc_clc_msg_proposal *)clcm;
46 pclc_prfx = smc_clc_proposal_get_prefix(pclc);
47 if (ntohs(pclc->hdr.length) !=
48 sizeof(*pclc) + ntohs(pclc->iparea_offset) +
49 sizeof(*pclc_prfx) +
50 pclc_prfx->ipv6_prefixes_cnt *
51 sizeof(struct smc_clc_ipv6_prefix) +
52 sizeof(*trl))
53 return false;
54 trl = (struct smc_clc_msg_trail *)
55 ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
56 break;
57 case SMC_CLC_ACCEPT:
58 case SMC_CLC_CONFIRM:
59 clc = (struct smc_clc_msg_accept_confirm *)clcm;
60 if (ntohs(clc->hdr.length) != sizeof(*clc))
61 return false;
62 trl = &clc->trl;
63 break;
64 case SMC_CLC_DECLINE:
65 dclc = (struct smc_clc_msg_decline *)clcm;
66 if (ntohs(dclc->hdr.length) != sizeof(*dclc))
67 return false;
68 trl = &dclc->trl;
69 break;
70 default:
71 return false;
73 if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
74 return false;
75 return true;
78 /* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
79 static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
80 struct smc_clc_msg_proposal_prefix *prop)
82 struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
84 if (!in_dev)
85 return -ENODEV;
86 for_ifa(in_dev) {
87 if (!inet_ifa_match(ipv4, ifa))
88 continue;
89 prop->prefix_len = inet_mask_len(ifa->ifa_mask);
90 prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
91 /* prop->ipv6_prefixes_cnt = 0; already done by memset before */
92 return 0;
93 } endfor_ifa(in_dev);
94 return -ENOENT;
97 /* fill CLC proposal msg with ipv6 prefixes from device */
98 static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
99 struct smc_clc_msg_proposal_prefix *prop,
100 struct smc_clc_ipv6_prefix *ipv6_prfx)
102 #if IS_ENABLED(CONFIG_IPV6)
103 struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
104 struct inet6_ifaddr *ifa;
105 int cnt = 0;
107 if (!in6_dev)
108 return -ENODEV;
109 /* use a maximum of 8 IPv6 prefixes from device */
110 list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
111 if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
112 continue;
113 ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
114 &ifa->addr, ifa->prefix_len);
115 ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
116 cnt++;
117 if (cnt == SMC_CLC_MAX_V6_PREFIX)
118 break;
120 prop->ipv6_prefixes_cnt = cnt;
121 if (cnt)
122 return 0;
123 #endif
124 return -ENOENT;
127 /* retrieve and set prefixes in CLC proposal msg */
128 static int smc_clc_prfx_set(struct socket *clcsock,
129 struct smc_clc_msg_proposal_prefix *prop,
130 struct smc_clc_ipv6_prefix *ipv6_prfx)
132 struct dst_entry *dst = sk_dst_get(clcsock->sk);
133 struct sockaddr_storage addrs;
134 struct sockaddr_in6 *addr6;
135 struct sockaddr_in *addr;
136 int rc = -ENOENT;
138 memset(prop, 0, sizeof(*prop));
139 if (!dst) {
140 rc = -ENOTCONN;
141 goto out;
143 if (!dst->dev) {
144 rc = -ENODEV;
145 goto out_rel;
147 /* get address to which the internal TCP socket is bound */
148 kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
149 /* analyze IP specific data of net_device belonging to TCP socket */
150 addr6 = (struct sockaddr_in6 *)&addrs;
151 rcu_read_lock();
152 if (addrs.ss_family == PF_INET) {
153 /* IPv4 */
154 addr = (struct sockaddr_in *)&addrs;
155 rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
156 } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
157 /* mapped IPv4 address - peer is IPv4 only */
158 rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
159 prop);
160 } else {
161 /* IPv6 */
162 rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
164 rcu_read_unlock();
165 out_rel:
166 dst_release(dst);
167 out:
168 return rc;
171 /* match ipv4 addrs of dev against addr in CLC proposal */
172 static int smc_clc_prfx_match4_rcu(struct net_device *dev,
173 struct smc_clc_msg_proposal_prefix *prop)
175 struct in_device *in_dev = __in_dev_get_rcu(dev);
177 if (!in_dev)
178 return -ENODEV;
179 for_ifa(in_dev) {
180 if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
181 inet_ifa_match(prop->outgoing_subnet, ifa))
182 return 0;
183 } endfor_ifa(in_dev);
185 return -ENOENT;
188 /* match ipv6 addrs of dev against addrs in CLC proposal */
189 static int smc_clc_prfx_match6_rcu(struct net_device *dev,
190 struct smc_clc_msg_proposal_prefix *prop)
192 #if IS_ENABLED(CONFIG_IPV6)
193 struct inet6_dev *in6_dev = __in6_dev_get(dev);
194 struct smc_clc_ipv6_prefix *ipv6_prfx;
195 struct inet6_ifaddr *ifa;
196 int i, max;
198 if (!in6_dev)
199 return -ENODEV;
200 /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
201 ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
202 max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
203 list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
204 if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
205 continue;
206 for (i = 0; i < max; i++) {
207 if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
208 ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
209 ifa->prefix_len))
210 return 0;
213 #endif
214 return -ENOENT;
217 /* check if proposed prefixes match one of our device prefixes */
218 int smc_clc_prfx_match(struct socket *clcsock,
219 struct smc_clc_msg_proposal_prefix *prop)
221 struct dst_entry *dst = sk_dst_get(clcsock->sk);
222 int rc;
224 if (!dst) {
225 rc = -ENOTCONN;
226 goto out;
228 if (!dst->dev) {
229 rc = -ENODEV;
230 goto out_rel;
232 rcu_read_lock();
233 if (!prop->ipv6_prefixes_cnt)
234 rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
235 else
236 rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
237 rcu_read_unlock();
238 out_rel:
239 dst_release(dst);
240 out:
241 return rc;
244 /* Wait for data on the tcp-socket, analyze received data
245 * Returns:
246 * 0 if success and it was not a decline that we received.
247 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
248 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
250 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
251 u8 expected_type)
253 long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
254 struct sock *clc_sk = smc->clcsock->sk;
255 struct smc_clc_msg_hdr *clcm = buf;
256 struct msghdr msg = {NULL, 0};
257 int reason_code = 0;
258 struct kvec vec = {buf, buflen};
259 int len, datlen;
260 int krflags;
262 /* peek the first few bytes to determine length of data to receive
263 * so we don't consume any subsequent CLC message or payload data
264 * in the TCP byte stream
267 * Caller must make sure that buflen is no less than
268 * sizeof(struct smc_clc_msg_hdr)
270 krflags = MSG_PEEK | MSG_WAITALL;
271 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
272 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
273 sizeof(struct smc_clc_msg_hdr));
274 len = sock_recvmsg(smc->clcsock, &msg, krflags);
275 if (signal_pending(current)) {
276 reason_code = -EINTR;
277 clc_sk->sk_err = EINTR;
278 smc->sk.sk_err = EINTR;
279 goto out;
281 if (clc_sk->sk_err) {
282 reason_code = -clc_sk->sk_err;
283 smc->sk.sk_err = clc_sk->sk_err;
284 goto out;
286 if (!len) { /* peer has performed orderly shutdown */
287 smc->sk.sk_err = ECONNRESET;
288 reason_code = -ECONNRESET;
289 goto out;
291 if (len < 0) {
292 smc->sk.sk_err = -len;
293 reason_code = len;
294 goto out;
296 datlen = ntohs(clcm->length);
297 if ((len < sizeof(struct smc_clc_msg_hdr)) ||
298 (datlen > buflen) ||
299 ((clcm->type != SMC_CLC_DECLINE) &&
300 (clcm->type != expected_type))) {
301 smc->sk.sk_err = EPROTO;
302 reason_code = -EPROTO;
303 goto out;
306 /* receive the complete CLC message */
307 memset(&msg, 0, sizeof(struct msghdr));
308 iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
309 krflags = MSG_WAITALL;
310 len = sock_recvmsg(smc->clcsock, &msg, krflags);
311 if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
312 smc->sk.sk_err = EPROTO;
313 reason_code = -EPROTO;
314 goto out;
316 if (clcm->type == SMC_CLC_DECLINE) {
317 reason_code = SMC_CLC_DECL_REPLY;
318 if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
319 smc->conn.lgr->sync_err = 1;
320 smc_lgr_terminate(smc->conn.lgr);
324 out:
325 smc->clcsock->sk->sk_rcvtimeo = rcvtimeo;
326 return reason_code;
329 /* send CLC DECLINE message across internal TCP socket */
330 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
332 struct smc_clc_msg_decline dclc;
333 struct msghdr msg;
334 struct kvec vec;
335 int len;
337 memset(&dclc, 0, sizeof(dclc));
338 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
339 dclc.hdr.type = SMC_CLC_DECLINE;
340 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
341 dclc.hdr.version = SMC_CLC_V1;
342 dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
343 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
344 dclc.peer_diagnosis = htonl(peer_diag_info);
345 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
347 memset(&msg, 0, sizeof(msg));
348 vec.iov_base = &dclc;
349 vec.iov_len = sizeof(struct smc_clc_msg_decline);
350 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
351 sizeof(struct smc_clc_msg_decline));
352 if (len < sizeof(struct smc_clc_msg_decline))
353 smc->sk.sk_err = EPROTO;
354 if (len < 0)
355 smc->sk.sk_err = -len;
356 return sock_error(&smc->sk);
359 /* send CLC PROPOSAL message across internal TCP socket */
360 int smc_clc_send_proposal(struct smc_sock *smc,
361 struct smc_ib_device *smcibdev,
362 u8 ibport)
364 struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
365 struct smc_clc_msg_proposal_prefix pclc_prfx;
366 struct smc_clc_msg_proposal pclc;
367 struct smc_clc_msg_trail trl;
368 int len, i, plen, rc;
369 int reason_code = 0;
370 struct kvec vec[4];
371 struct msghdr msg;
373 /* retrieve ip prefixes for CLC proposal msg */
374 rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
375 if (rc)
376 return SMC_CLC_DECL_CNFERR; /* configuration error */
378 /* send SMC Proposal CLC message */
379 plen = sizeof(pclc) + sizeof(pclc_prfx) +
380 (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
381 sizeof(trl);
382 memset(&pclc, 0, sizeof(pclc));
383 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
384 pclc.hdr.type = SMC_CLC_PROPOSAL;
385 pclc.hdr.length = htons(plen);
386 pclc.hdr.version = SMC_CLC_V1; /* SMC version */
387 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
388 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
389 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
390 pclc.iparea_offset = htons(0);
392 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
393 memset(&msg, 0, sizeof(msg));
394 i = 0;
395 vec[i].iov_base = &pclc;
396 vec[i++].iov_len = sizeof(pclc);
397 vec[i].iov_base = &pclc_prfx;
398 vec[i++].iov_len = sizeof(pclc_prfx);
399 if (pclc_prfx.ipv6_prefixes_cnt > 0) {
400 vec[i].iov_base = &ipv6_prfx[0];
401 vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
402 sizeof(ipv6_prfx[0]);
404 vec[i].iov_base = &trl;
405 vec[i++].iov_len = sizeof(trl);
406 /* due to the few bytes needed for clc-handshake this cannot block */
407 len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
408 if (len < sizeof(pclc)) {
409 if (len >= 0) {
410 reason_code = -ENETUNREACH;
411 smc->sk.sk_err = -reason_code;
412 } else {
413 smc->sk.sk_err = smc->clcsock->sk->sk_err;
414 reason_code = -smc->sk.sk_err;
418 return reason_code;
421 /* send CLC CONFIRM message across internal TCP socket */
422 int smc_clc_send_confirm(struct smc_sock *smc)
424 struct smc_connection *conn = &smc->conn;
425 struct smc_clc_msg_accept_confirm cclc;
426 struct smc_link *link;
427 int reason_code = 0;
428 struct msghdr msg;
429 struct kvec vec;
430 int len;
432 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
433 /* send SMC Confirm CLC msg */
434 memset(&cclc, 0, sizeof(cclc));
435 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
436 cclc.hdr.type = SMC_CLC_CONFIRM;
437 cclc.hdr.length = htons(sizeof(cclc));
438 cclc.hdr.version = SMC_CLC_V1; /* SMC version */
439 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
440 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
441 SMC_GID_SIZE);
442 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
443 hton24(cclc.qpn, link->roce_qp->qp_num);
444 cclc.rmb_rkey =
445 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
446 cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
447 cclc.rmbe_alert_token = htonl(conn->alert_token_local);
448 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
449 cclc.rmbe_size = conn->rmbe_size_short;
450 cclc.rmb_dma_addr = cpu_to_be64(
451 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
452 hton24(cclc.psn, link->psn_initial);
454 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
456 memset(&msg, 0, sizeof(msg));
457 vec.iov_base = &cclc;
458 vec.iov_len = sizeof(cclc);
459 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
460 if (len < sizeof(cclc)) {
461 if (len >= 0) {
462 reason_code = -ENETUNREACH;
463 smc->sk.sk_err = -reason_code;
464 } else {
465 smc->sk.sk_err = smc->clcsock->sk->sk_err;
466 reason_code = -smc->sk.sk_err;
469 return reason_code;
472 /* send CLC ACCEPT message across internal TCP socket */
473 int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
475 struct smc_connection *conn = &new_smc->conn;
476 struct smc_clc_msg_accept_confirm aclc;
477 struct smc_link *link;
478 struct msghdr msg;
479 struct kvec vec;
480 int rc = 0;
481 int len;
483 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
484 memset(&aclc, 0, sizeof(aclc));
485 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
486 aclc.hdr.type = SMC_CLC_ACCEPT;
487 aclc.hdr.length = htons(sizeof(aclc));
488 aclc.hdr.version = SMC_CLC_V1; /* SMC version */
489 if (srv_first_contact)
490 aclc.hdr.flag = 1;
491 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
492 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
493 SMC_GID_SIZE);
494 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
495 hton24(aclc.qpn, link->roce_qp->qp_num);
496 aclc.rmb_rkey =
497 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
498 aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */
499 aclc.rmbe_alert_token = htonl(conn->alert_token_local);
500 aclc.qp_mtu = link->path_mtu;
501 aclc.rmbe_size = conn->rmbe_size_short,
502 aclc.rmb_dma_addr = cpu_to_be64(
503 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
504 hton24(aclc.psn, link->psn_initial);
505 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
507 memset(&msg, 0, sizeof(msg));
508 vec.iov_base = &aclc;
509 vec.iov_len = sizeof(aclc);
510 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
511 if (len < sizeof(aclc)) {
512 if (len >= 0)
513 new_smc->sk.sk_err = EPROTO;
514 else
515 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
516 rc = sock_error(&new_smc->sk);
519 return rc;