1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Basic Transport Functions exploiting Infiniband API
7 * Copyright IBM Corp. 2016
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <linux/mutex.h>
19 #include <linux/list.h>
20 #include <linux/smc.h>
23 #include <rdma/ib_verbs.h>
24 #include <rdma/ib_cache.h>
33 #include "smc_close.h"
35 #include "smc_netlink.h"
36 #include "smc_stats.h"
37 #include "smc_tracepoint.h"
39 #define SMC_LGR_NUM_INCR 256
40 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
41 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
43 struct smc_lgr_list smc_lgr_list
= { /* established link groups */
44 .lock
= __SPIN_LOCK_UNLOCKED(smc_lgr_list
.lock
),
45 .list
= LIST_HEAD_INIT(smc_lgr_list
.list
),
49 static atomic_t lgr_cnt
= ATOMIC_INIT(0); /* number of existing link groups */
50 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted
);
52 static void smc_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
53 struct smc_buf_desc
*buf_desc
);
54 static void __smc_lgr_terminate(struct smc_link_group
*lgr
, bool soft
);
56 static void smc_link_down_work(struct work_struct
*work
);
58 /* return head of link group list and its lock for a given link group */
59 static inline struct list_head
*smc_lgr_list_head(struct smc_link_group
*lgr
,
60 spinlock_t
**lgr_lock
)
63 *lgr_lock
= &lgr
->smcd
->lgr_lock
;
64 return &lgr
->smcd
->lgr_list
;
67 *lgr_lock
= &smc_lgr_list
.lock
;
68 return &smc_lgr_list
.list
;
71 static void smc_ibdev_cnt_inc(struct smc_link
*lnk
)
73 atomic_inc(&lnk
->smcibdev
->lnk_cnt_by_port
[lnk
->ibport
- 1]);
76 static void smc_ibdev_cnt_dec(struct smc_link
*lnk
)
78 atomic_dec(&lnk
->smcibdev
->lnk_cnt_by_port
[lnk
->ibport
- 1]);
81 static void smc_lgr_schedule_free_work(struct smc_link_group
*lgr
)
83 /* client link group creation always follows the server link group
84 * creation. For client use a somewhat higher removal delay time,
85 * otherwise there is a risk of out-of-sync link groups.
88 mod_delayed_work(system_wq
, &lgr
->free_work
,
89 (!lgr
->is_smcd
&& lgr
->role
== SMC_CLNT
) ?
90 SMC_LGR_FREE_DELAY_CLNT
:
91 SMC_LGR_FREE_DELAY_SERV
);
95 /* Register connection's alert token in our lookup structure.
96 * To use rbtrees we have to implement our own insert core.
97 * Requires @conns_lock
98 * @smc connection to register
99 * Returns 0 on success, != otherwise.
101 static void smc_lgr_add_alert_token(struct smc_connection
*conn
)
103 struct rb_node
**link
, *parent
= NULL
;
104 u32 token
= conn
->alert_token_local
;
106 link
= &conn
->lgr
->conns_all
.rb_node
;
108 struct smc_connection
*cur
= rb_entry(*link
,
109 struct smc_connection
, alert_node
);
112 if (cur
->alert_token_local
> token
)
113 link
= &parent
->rb_left
;
115 link
= &parent
->rb_right
;
117 /* Put the new node there */
118 rb_link_node(&conn
->alert_node
, parent
, link
);
119 rb_insert_color(&conn
->alert_node
, &conn
->lgr
->conns_all
);
122 /* assign an SMC-R link to the connection */
123 static int smcr_lgr_conn_assign_link(struct smc_connection
*conn
, bool first
)
125 enum smc_link_state expected
= first
? SMC_LNK_ACTIVATING
:
129 /* do link balancing */
130 conn
->lnk
= NULL
; /* reset conn->lnk first */
131 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
132 struct smc_link
*lnk
= &conn
->lgr
->lnk
[i
];
134 if (lnk
->state
!= expected
|| lnk
->link_is_asym
)
136 if (conn
->lgr
->role
== SMC_CLNT
) {
137 conn
->lnk
= lnk
; /* temporary, SMC server assigns link*/
140 if (conn
->lgr
->conns_num
% 2) {
141 for (j
= i
+ 1; j
< SMC_LINKS_PER_LGR_MAX
; j
++) {
142 struct smc_link
*lnk2
;
144 lnk2
= &conn
->lgr
->lnk
[j
];
145 if (lnk2
->state
== expected
&&
146 !lnk2
->link_is_asym
) {
157 return SMC_CLC_DECL_NOACTLINK
;
158 atomic_inc(&conn
->lnk
->conn_cnt
);
162 /* Register connection in link group by assigning an alert token
163 * registered in a search tree.
164 * Requires @conns_lock
165 * Note that '0' is a reserved value and not assigned.
167 static int smc_lgr_register_conn(struct smc_connection
*conn
, bool first
)
169 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
170 static atomic_t nexttoken
= ATOMIC_INIT(0);
173 if (!conn
->lgr
->is_smcd
) {
174 rc
= smcr_lgr_conn_assign_link(conn
, first
);
180 /* find a new alert_token_local value not yet used by some connection
183 sock_hold(&smc
->sk
); /* sock_put in smc_lgr_unregister_conn() */
184 while (!conn
->alert_token_local
) {
185 conn
->alert_token_local
= atomic_inc_return(&nexttoken
);
186 if (smc_lgr_find_conn(conn
->alert_token_local
, conn
->lgr
))
187 conn
->alert_token_local
= 0;
189 smc_lgr_add_alert_token(conn
);
190 conn
->lgr
->conns_num
++;
194 /* Unregister connection and reset the alert token of the given connection<
196 static void __smc_lgr_unregister_conn(struct smc_connection
*conn
)
198 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
199 struct smc_link_group
*lgr
= conn
->lgr
;
201 rb_erase(&conn
->alert_node
, &lgr
->conns_all
);
203 atomic_dec(&conn
->lnk
->conn_cnt
);
205 conn
->alert_token_local
= 0;
206 sock_put(&smc
->sk
); /* sock_hold in smc_lgr_register_conn() */
209 /* Unregister connection from lgr
211 static void smc_lgr_unregister_conn(struct smc_connection
*conn
)
213 struct smc_link_group
*lgr
= conn
->lgr
;
215 if (!smc_conn_lgr_valid(conn
))
217 write_lock_bh(&lgr
->conns_lock
);
218 if (conn
->alert_token_local
) {
219 __smc_lgr_unregister_conn(conn
);
221 write_unlock_bh(&lgr
->conns_lock
);
224 static void smc_lgr_buf_list_add(struct smc_link_group
*lgr
,
226 struct list_head
*buf_list
,
227 struct smc_buf_desc
*buf_desc
)
229 list_add(&buf_desc
->list
, buf_list
);
231 lgr
->alloc_rmbs
+= buf_desc
->len
;
233 lgr
->is_smcd
? sizeof(struct smcd_cdc_msg
) : 0;
235 lgr
->alloc_sndbufs
+= buf_desc
->len
;
239 static void smc_lgr_buf_list_del(struct smc_link_group
*lgr
,
241 struct smc_buf_desc
*buf_desc
)
243 list_del(&buf_desc
->list
);
245 lgr
->alloc_rmbs
-= buf_desc
->len
;
247 lgr
->is_smcd
? sizeof(struct smcd_cdc_msg
) : 0;
249 lgr
->alloc_sndbufs
-= buf_desc
->len
;
253 int smc_nl_get_sys_info(struct sk_buff
*skb
, struct netlink_callback
*cb
)
255 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
256 char hostname
[SMC_MAX_HOSTNAME_LEN
+ 1];
257 char smc_seid
[SMC_MAX_EID_LEN
+ 1];
258 struct nlattr
*attrs
;
263 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
264 &smc_gen_nl_family
, NLM_F_MULTI
,
265 SMC_NETLINK_GET_SYS_INFO
);
270 attrs
= nla_nest_start(skb
, SMC_GEN_SYS_INFO
);
273 if (nla_put_u8(skb
, SMC_NLA_SYS_VER
, SMC_V2
))
275 if (nla_put_u8(skb
, SMC_NLA_SYS_REL
, SMC_RELEASE
))
277 if (nla_put_u8(skb
, SMC_NLA_SYS_IS_ISM_V2
, smc_ism_is_v2_capable()))
279 if (nla_put_u8(skb
, SMC_NLA_SYS_IS_SMCR_V2
, true))
281 smc_clc_get_hostname(&host
);
283 memcpy(hostname
, host
, SMC_MAX_HOSTNAME_LEN
);
284 hostname
[SMC_MAX_HOSTNAME_LEN
] = 0;
285 if (nla_put_string(skb
, SMC_NLA_SYS_LOCAL_HOST
, hostname
))
288 if (smc_ism_is_v2_capable()) {
289 smc_ism_get_system_eid(&seid
);
290 memcpy(smc_seid
, seid
, SMC_MAX_EID_LEN
);
291 smc_seid
[SMC_MAX_EID_LEN
] = 0;
292 if (nla_put_string(skb
, SMC_NLA_SYS_SEID
, smc_seid
))
295 nla_nest_end(skb
, attrs
);
296 genlmsg_end(skb
, nlh
);
301 nla_nest_cancel(skb
, attrs
);
303 genlmsg_cancel(skb
, nlh
);
308 /* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
309 static int smc_nl_fill_lgr_v2_common(struct smc_link_group
*lgr
,
311 struct netlink_callback
*cb
,
312 struct nlattr
*v2_attrs
)
314 char smc_host
[SMC_MAX_HOSTNAME_LEN
+ 1];
315 char smc_eid
[SMC_MAX_EID_LEN
+ 1];
317 if (nla_put_u8(skb
, SMC_NLA_LGR_V2_VER
, lgr
->smc_version
))
319 if (nla_put_u8(skb
, SMC_NLA_LGR_V2_REL
, lgr
->peer_smc_release
))
321 if (nla_put_u8(skb
, SMC_NLA_LGR_V2_OS
, lgr
->peer_os
))
323 memcpy(smc_host
, lgr
->peer_hostname
, SMC_MAX_HOSTNAME_LEN
);
324 smc_host
[SMC_MAX_HOSTNAME_LEN
] = 0;
325 if (nla_put_string(skb
, SMC_NLA_LGR_V2_PEER_HOST
, smc_host
))
327 memcpy(smc_eid
, lgr
->negotiated_eid
, SMC_MAX_EID_LEN
);
328 smc_eid
[SMC_MAX_EID_LEN
] = 0;
329 if (nla_put_string(skb
, SMC_NLA_LGR_V2_NEG_EID
, smc_eid
))
332 nla_nest_end(skb
, v2_attrs
);
336 nla_nest_cancel(skb
, v2_attrs
);
340 static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group
*lgr
,
342 struct netlink_callback
*cb
)
344 struct nlattr
*v2_attrs
;
346 v2_attrs
= nla_nest_start(skb
, SMC_NLA_LGR_R_V2
);
349 if (nla_put_u8(skb
, SMC_NLA_LGR_R_V2_DIRECT
, !lgr
->uses_gateway
))
351 if (nla_put_u8(skb
, SMC_NLA_LGR_R_V2_MAX_CONNS
, lgr
->max_conns
))
353 if (nla_put_u8(skb
, SMC_NLA_LGR_R_V2_MAX_LINKS
, lgr
->max_links
))
356 nla_nest_end(skb
, v2_attrs
);
360 nla_nest_cancel(skb
, v2_attrs
);
365 static int smc_nl_fill_lgr(struct smc_link_group
*lgr
,
367 struct netlink_callback
*cb
)
369 char smc_target
[SMC_MAX_PNETID_LEN
+ 1];
370 struct nlattr
*attrs
, *v2_attrs
;
372 attrs
= nla_nest_start(skb
, SMC_GEN_LGR_SMCR
);
376 if (nla_put_u32(skb
, SMC_NLA_LGR_R_ID
, *((u32
*)&lgr
->id
)))
378 if (nla_put_u32(skb
, SMC_NLA_LGR_R_CONNS_NUM
, lgr
->conns_num
))
380 if (nla_put_u8(skb
, SMC_NLA_LGR_R_ROLE
, lgr
->role
))
382 if (nla_put_u8(skb
, SMC_NLA_LGR_R_TYPE
, lgr
->type
))
384 if (nla_put_u8(skb
, SMC_NLA_LGR_R_BUF_TYPE
, lgr
->buf_type
))
386 if (nla_put_u8(skb
, SMC_NLA_LGR_R_VLAN_ID
, lgr
->vlan_id
))
388 if (nla_put_u64_64bit(skb
, SMC_NLA_LGR_R_NET_COOKIE
,
389 lgr
->net
->net_cookie
, SMC_NLA_LGR_R_PAD
))
391 memcpy(smc_target
, lgr
->pnet_id
, SMC_MAX_PNETID_LEN
);
392 smc_target
[SMC_MAX_PNETID_LEN
] = 0;
393 if (nla_put_string(skb
, SMC_NLA_LGR_R_PNETID
, smc_target
))
395 if (nla_put_uint(skb
, SMC_NLA_LGR_R_SNDBUF_ALLOC
, lgr
->alloc_sndbufs
))
397 if (nla_put_uint(skb
, SMC_NLA_LGR_R_RMB_ALLOC
, lgr
->alloc_rmbs
))
399 if (lgr
->smc_version
> SMC_V1
) {
400 v2_attrs
= nla_nest_start(skb
, SMC_NLA_LGR_R_V2_COMMON
);
403 if (smc_nl_fill_lgr_v2_common(lgr
, skb
, cb
, v2_attrs
))
405 if (smc_nl_fill_smcr_lgr_v2(lgr
, skb
, cb
))
409 nla_nest_end(skb
, attrs
);
412 nla_nest_cancel(skb
, attrs
);
417 static int smc_nl_fill_lgr_link(struct smc_link_group
*lgr
,
418 struct smc_link
*link
,
420 struct netlink_callback
*cb
)
422 char smc_ibname
[IB_DEVICE_NAME_MAX
];
423 u8 smc_gid_target
[41];
424 struct nlattr
*attrs
;
428 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
429 &smc_gen_nl_family
, NLM_F_MULTI
,
430 SMC_NETLINK_GET_LINK_SMCR
);
434 attrs
= nla_nest_start(skb
, SMC_GEN_LINK_SMCR
);
438 if (nla_put_u8(skb
, SMC_NLA_LINK_ID
, link
->link_id
))
440 if (nla_put_u32(skb
, SMC_NLA_LINK_STATE
, link
->state
))
442 if (nla_put_u32(skb
, SMC_NLA_LINK_CONN_CNT
,
443 atomic_read(&link
->conn_cnt
)))
445 if (nla_put_u8(skb
, SMC_NLA_LINK_IB_PORT
, link
->ibport
))
447 if (nla_put_u32(skb
, SMC_NLA_LINK_NET_DEV
, link
->ndev_ifidx
))
449 snprintf(smc_ibname
, sizeof(smc_ibname
), "%s", link
->ibname
);
450 if (nla_put_string(skb
, SMC_NLA_LINK_IB_DEV
, smc_ibname
))
452 memcpy(&link_uid
, link
->link_uid
, sizeof(link_uid
));
453 if (nla_put_u32(skb
, SMC_NLA_LINK_UID
, link_uid
))
455 memcpy(&link_uid
, link
->peer_link_uid
, sizeof(link_uid
));
456 if (nla_put_u32(skb
, SMC_NLA_LINK_PEER_UID
, link_uid
))
458 memset(smc_gid_target
, 0, sizeof(smc_gid_target
));
459 smc_gid_be16_convert(smc_gid_target
, link
->gid
);
460 if (nla_put_string(skb
, SMC_NLA_LINK_GID
, smc_gid_target
))
462 memset(smc_gid_target
, 0, sizeof(smc_gid_target
));
463 smc_gid_be16_convert(smc_gid_target
, link
->peer_gid
);
464 if (nla_put_string(skb
, SMC_NLA_LINK_PEER_GID
, smc_gid_target
))
467 nla_nest_end(skb
, attrs
);
468 genlmsg_end(skb
, nlh
);
471 nla_nest_cancel(skb
, attrs
);
473 genlmsg_cancel(skb
, nlh
);
478 static int smc_nl_handle_lgr(struct smc_link_group
*lgr
,
480 struct netlink_callback
*cb
,
486 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
487 &smc_gen_nl_family
, NLM_F_MULTI
,
488 SMC_NETLINK_GET_LGR_SMCR
);
491 if (smc_nl_fill_lgr(lgr
, skb
, cb
))
494 genlmsg_end(skb
, nlh
);
497 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
498 if (!smc_link_usable(&lgr
->lnk
[i
]))
500 if (smc_nl_fill_lgr_link(lgr
, &lgr
->lnk
[i
], skb
, cb
))
507 genlmsg_cancel(skb
, nlh
);
512 static void smc_nl_fill_lgr_list(struct smc_lgr_list
*smc_lgr
,
514 struct netlink_callback
*cb
,
517 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
518 struct smc_link_group
*lgr
;
519 int snum
= cb_ctx
->pos
[0];
522 spin_lock_bh(&smc_lgr
->lock
);
523 list_for_each_entry(lgr
, &smc_lgr
->list
, list
) {
526 if (smc_nl_handle_lgr(lgr
, skb
, cb
, list_links
))
532 spin_unlock_bh(&smc_lgr
->lock
);
533 cb_ctx
->pos
[0] = num
;
536 static int smc_nl_fill_smcd_lgr(struct smc_link_group
*lgr
,
538 struct netlink_callback
*cb
)
540 char smc_pnet
[SMC_MAX_PNETID_LEN
+ 1];
541 struct smcd_dev
*smcd
= lgr
->smcd
;
542 struct smcd_gid smcd_gid
;
543 struct nlattr
*attrs
;
546 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
547 &smc_gen_nl_family
, NLM_F_MULTI
,
548 SMC_NETLINK_GET_LGR_SMCD
);
552 attrs
= nla_nest_start(skb
, SMC_GEN_LGR_SMCD
);
556 if (nla_put_u32(skb
, SMC_NLA_LGR_D_ID
, *((u32
*)&lgr
->id
)))
558 smcd
->ops
->get_local_gid(smcd
, &smcd_gid
);
559 if (nla_put_u64_64bit(skb
, SMC_NLA_LGR_D_GID
,
560 smcd_gid
.gid
, SMC_NLA_LGR_D_PAD
))
562 if (nla_put_u64_64bit(skb
, SMC_NLA_LGR_D_EXT_GID
,
563 smcd_gid
.gid_ext
, SMC_NLA_LGR_D_PAD
))
565 if (nla_put_u64_64bit(skb
, SMC_NLA_LGR_D_PEER_GID
, lgr
->peer_gid
.gid
,
568 if (nla_put_u64_64bit(skb
, SMC_NLA_LGR_D_PEER_EXT_GID
,
569 lgr
->peer_gid
.gid_ext
, SMC_NLA_LGR_D_PAD
))
571 if (nla_put_u8(skb
, SMC_NLA_LGR_D_VLAN_ID
, lgr
->vlan_id
))
573 if (nla_put_u32(skb
, SMC_NLA_LGR_D_CONNS_NUM
, lgr
->conns_num
))
575 if (nla_put_u32(skb
, SMC_NLA_LGR_D_CHID
, smc_ism_get_chid(lgr
->smcd
)))
577 if (nla_put_uint(skb
, SMC_NLA_LGR_D_SNDBUF_ALLOC
, lgr
->alloc_sndbufs
))
579 if (nla_put_uint(skb
, SMC_NLA_LGR_D_DMB_ALLOC
, lgr
->alloc_rmbs
))
581 memcpy(smc_pnet
, lgr
->smcd
->pnetid
, SMC_MAX_PNETID_LEN
);
582 smc_pnet
[SMC_MAX_PNETID_LEN
] = 0;
583 if (nla_put_string(skb
, SMC_NLA_LGR_D_PNETID
, smc_pnet
))
585 if (lgr
->smc_version
> SMC_V1
) {
586 struct nlattr
*v2_attrs
;
588 v2_attrs
= nla_nest_start(skb
, SMC_NLA_LGR_D_V2_COMMON
);
591 if (smc_nl_fill_lgr_v2_common(lgr
, skb
, cb
, v2_attrs
))
594 nla_nest_end(skb
, attrs
);
595 genlmsg_end(skb
, nlh
);
599 nla_nest_cancel(skb
, attrs
);
601 genlmsg_cancel(skb
, nlh
);
606 static int smc_nl_handle_smcd_lgr(struct smcd_dev
*dev
,
608 struct netlink_callback
*cb
)
610 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
611 struct smc_link_group
*lgr
;
612 int snum
= cb_ctx
->pos
[1];
615 spin_lock_bh(&dev
->lgr_lock
);
616 list_for_each_entry(lgr
, &dev
->lgr_list
, list
) {
621 rc
= smc_nl_fill_smcd_lgr(lgr
, skb
, cb
);
628 spin_unlock_bh(&dev
->lgr_lock
);
629 cb_ctx
->pos
[1] = num
;
633 static int smc_nl_fill_smcd_dev(struct smcd_dev_list
*dev_list
,
635 struct netlink_callback
*cb
)
637 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
638 struct smcd_dev
*smcd_dev
;
639 int snum
= cb_ctx
->pos
[0];
642 mutex_lock(&dev_list
->mutex
);
643 list_for_each_entry(smcd_dev
, &dev_list
->list
, list
) {
644 if (list_empty(&smcd_dev
->lgr_list
))
648 rc
= smc_nl_handle_smcd_lgr(smcd_dev
, skb
, cb
);
655 mutex_unlock(&dev_list
->mutex
);
656 cb_ctx
->pos
[0] = num
;
660 int smcr_nl_get_lgr(struct sk_buff
*skb
, struct netlink_callback
*cb
)
662 bool list_links
= false;
664 smc_nl_fill_lgr_list(&smc_lgr_list
, skb
, cb
, list_links
);
668 int smcr_nl_get_link(struct sk_buff
*skb
, struct netlink_callback
*cb
)
670 bool list_links
= true;
672 smc_nl_fill_lgr_list(&smc_lgr_list
, skb
, cb
, list_links
);
676 int smcd_nl_get_lgr(struct sk_buff
*skb
, struct netlink_callback
*cb
)
678 smc_nl_fill_smcd_dev(&smcd_dev_list
, skb
, cb
);
682 void smc_lgr_cleanup_early(struct smc_link_group
*lgr
)
684 spinlock_t
*lgr_lock
;
689 smc_lgr_list_head(lgr
, &lgr_lock
);
690 spin_lock_bh(lgr_lock
);
691 /* do not use this link group for new connections */
692 if (!list_empty(&lgr
->list
))
693 list_del_init(&lgr
->list
);
694 spin_unlock_bh(lgr_lock
);
695 __smc_lgr_terminate(lgr
, true);
698 static void smcr_lgr_link_deactivate_all(struct smc_link_group
*lgr
)
702 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
703 struct smc_link
*lnk
= &lgr
->lnk
[i
];
705 if (smc_link_sendable(lnk
))
706 lnk
->state
= SMC_LNK_INACTIVE
;
708 wake_up_all(&lgr
->llc_msg_waiter
);
709 wake_up_all(&lgr
->llc_flow_waiter
);
712 static void smc_lgr_free(struct smc_link_group
*lgr
);
714 static void smc_lgr_free_work(struct work_struct
*work
)
716 struct smc_link_group
*lgr
= container_of(to_delayed_work(work
),
717 struct smc_link_group
,
719 spinlock_t
*lgr_lock
;
722 smc_lgr_list_head(lgr
, &lgr_lock
);
723 spin_lock_bh(lgr_lock
);
725 spin_unlock_bh(lgr_lock
);
728 read_lock_bh(&lgr
->conns_lock
);
729 conns
= RB_EMPTY_ROOT(&lgr
->conns_all
);
730 read_unlock_bh(&lgr
->conns_lock
);
731 if (!conns
) { /* number of lgr connections is no longer zero */
732 spin_unlock_bh(lgr_lock
);
735 list_del_init(&lgr
->list
); /* remove from smc_lgr_list */
736 lgr
->freeing
= 1; /* this instance does the freeing, no new schedule */
737 spin_unlock_bh(lgr_lock
);
738 cancel_delayed_work(&lgr
->free_work
);
740 if (!lgr
->is_smcd
&& !lgr
->terminating
)
741 smc_llc_send_link_delete_all(lgr
, true,
742 SMC_LLC_DEL_PROG_INIT_TERM
);
743 if (lgr
->is_smcd
&& !lgr
->terminating
)
744 smc_ism_signal_shutdown(lgr
);
746 smcr_lgr_link_deactivate_all(lgr
);
750 static void smc_lgr_terminate_work(struct work_struct
*work
)
752 struct smc_link_group
*lgr
= container_of(work
, struct smc_link_group
,
755 __smc_lgr_terminate(lgr
, true);
758 /* return next unique link id for the lgr */
759 static u8
smcr_next_link_id(struct smc_link_group
*lgr
)
766 link_id
= ++lgr
->next_link_id
;
767 if (!link_id
) /* skip zero as link_id */
768 link_id
= ++lgr
->next_link_id
;
769 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
770 if (smc_link_usable(&lgr
->lnk
[i
]) &&
771 lgr
->lnk
[i
].link_id
== link_id
)
779 static void smcr_copy_dev_info_to_link(struct smc_link
*link
)
781 struct smc_ib_device
*smcibdev
= link
->smcibdev
;
783 snprintf(link
->ibname
, sizeof(link
->ibname
), "%s",
784 smcibdev
->ibdev
->name
);
785 link
->ndev_ifidx
= smcibdev
->ndev_ifidx
[link
->ibport
- 1];
788 int smcr_link_init(struct smc_link_group
*lgr
, struct smc_link
*lnk
,
789 u8 link_idx
, struct smc_init_info
*ini
)
791 struct smc_ib_device
*smcibdev
;
795 if (lgr
->smc_version
== SMC_V2
) {
796 lnk
->smcibdev
= ini
->smcrv2
.ib_dev_v2
;
797 lnk
->ibport
= ini
->smcrv2
.ib_port_v2
;
799 lnk
->smcibdev
= ini
->ib_dev
;
800 lnk
->ibport
= ini
->ib_port
;
802 get_device(&lnk
->smcibdev
->ibdev
->dev
);
803 atomic_inc(&lnk
->smcibdev
->lnk_cnt
);
804 refcount_set(&lnk
->refcnt
, 1); /* link refcnt is set to 1 */
806 lnk
->path_mtu
= lnk
->smcibdev
->pattr
[lnk
->ibport
- 1].active_mtu
;
807 lnk
->link_id
= smcr_next_link_id(lgr
);
809 smc_lgr_hold(lgr
); /* lgr_put in smcr_link_clear() */
810 lnk
->link_idx
= link_idx
;
811 lnk
->wr_rx_id_compl
= 0;
812 smc_ibdev_cnt_inc(lnk
);
813 smcr_copy_dev_info_to_link(lnk
);
814 atomic_set(&lnk
->conn_cnt
, 0);
815 smc_llc_link_set_uid(lnk
);
816 INIT_WORK(&lnk
->link_down_wrk
, smc_link_down_work
);
817 if (!lnk
->smcibdev
->initialized
) {
818 rc
= (int)smc_ib_setup_per_ibdev(lnk
->smcibdev
);
822 get_random_bytes(rndvec
, sizeof(rndvec
));
823 lnk
->psn_initial
= rndvec
[0] + (rndvec
[1] << 8) +
825 rc
= smc_ib_determine_gid(lnk
->smcibdev
, lnk
->ibport
,
826 ini
->vlan_id
, lnk
->gid
, &lnk
->sgid_index
,
827 lgr
->smc_version
== SMC_V2
?
828 &ini
->smcrv2
: NULL
);
831 rc
= smc_llc_link_init(lnk
);
834 rc
= smc_wr_alloc_link_mem(lnk
);
837 rc
= smc_ib_create_protection_domain(lnk
);
840 rc
= smc_ib_create_queue_pair(lnk
);
843 rc
= smc_wr_create_link(lnk
);
846 lnk
->state
= SMC_LNK_ACTIVATING
;
850 smc_ib_destroy_queue_pair(lnk
);
852 smc_ib_dealloc_protection_domain(lnk
);
854 smc_wr_free_link_mem(lnk
);
856 smc_llc_link_clear(lnk
, false);
858 smc_ibdev_cnt_dec(lnk
);
859 put_device(&lnk
->smcibdev
->ibdev
->dev
);
860 smcibdev
= lnk
->smcibdev
;
861 memset(lnk
, 0, sizeof(struct smc_link
));
862 lnk
->state
= SMC_LNK_UNUSED
;
863 if (!atomic_dec_return(&smcibdev
->lnk_cnt
))
864 wake_up(&smcibdev
->lnks_deleted
);
865 smc_lgr_put(lgr
); /* lgr_hold above */
869 /* create a new SMC link group */
870 static int smc_lgr_create(struct smc_sock
*smc
, struct smc_init_info
*ini
)
872 struct smc_link_group
*lgr
;
873 struct list_head
*lgr_list
;
874 struct smcd_dev
*smcd
;
875 struct smc_link
*lnk
;
876 spinlock_t
*lgr_lock
;
881 if (ini
->is_smcd
&& ini
->vlan_id
) {
882 if (smc_ism_get_vlan(ini
->ism_dev
[ini
->ism_selected
],
884 rc
= SMC_CLC_DECL_ISMVLANERR
;
889 lgr
= kzalloc(sizeof(*lgr
), GFP_KERNEL
);
891 rc
= SMC_CLC_DECL_MEM
;
894 lgr
->tx_wq
= alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
895 SMC_LGR_ID_SIZE
, &lgr
->id
);
900 lgr
->is_smcd
= ini
->is_smcd
;
902 lgr
->terminating
= 0;
904 lgr
->vlan_id
= ini
->vlan_id
;
905 refcount_set(&lgr
->refcnt
, 1); /* set lgr refcnt to 1 */
906 init_rwsem(&lgr
->sndbufs_lock
);
907 init_rwsem(&lgr
->rmbs_lock
);
908 rwlock_init(&lgr
->conns_lock
);
909 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
910 INIT_LIST_HEAD(&lgr
->sndbufs
[i
]);
911 INIT_LIST_HEAD(&lgr
->rmbs
[i
]);
913 lgr
->next_link_id
= 0;
914 smc_lgr_list
.num
+= SMC_LGR_NUM_INCR
;
915 memcpy(&lgr
->id
, (u8
*)&smc_lgr_list
.num
, SMC_LGR_ID_SIZE
);
916 INIT_DELAYED_WORK(&lgr
->free_work
, smc_lgr_free_work
);
917 INIT_WORK(&lgr
->terminate_work
, smc_lgr_terminate_work
);
918 lgr
->conns_all
= RB_ROOT
;
920 /* SMC-D specific settings */
921 smcd
= ini
->ism_dev
[ini
->ism_selected
];
922 get_device(smcd
->ops
->get_dev(smcd
));
924 ini
->ism_peer_gid
[ini
->ism_selected
].gid
;
925 lgr
->peer_gid
.gid_ext
=
926 ini
->ism_peer_gid
[ini
->ism_selected
].gid_ext
;
927 lgr
->smcd
= ini
->ism_dev
[ini
->ism_selected
];
928 lgr_list
= &ini
->ism_dev
[ini
->ism_selected
]->lgr_list
;
929 lgr_lock
= &lgr
->smcd
->lgr_lock
;
930 lgr
->smc_version
= ini
->smcd_version
;
931 lgr
->peer_shutdown
= 0;
932 atomic_inc(&ini
->ism_dev
[ini
->ism_selected
]->lgr_cnt
);
934 /* SMC-R specific settings */
935 struct smc_ib_device
*ibdev
;
938 lgr
->role
= smc
->listen_smc
? SMC_SERV
: SMC_CLNT
;
939 lgr
->smc_version
= ini
->smcr_version
;
940 memcpy(lgr
->peer_systemid
, ini
->peer_systemid
,
942 if (lgr
->smc_version
== SMC_V2
) {
943 ibdev
= ini
->smcrv2
.ib_dev_v2
;
944 ibport
= ini
->smcrv2
.ib_port_v2
;
945 lgr
->saddr
= ini
->smcrv2
.saddr
;
946 lgr
->uses_gateway
= ini
->smcrv2
.uses_gateway
;
947 memcpy(lgr
->nexthop_mac
, ini
->smcrv2
.nexthop_mac
,
949 lgr
->max_conns
= ini
->max_conns
;
950 lgr
->max_links
= ini
->max_links
;
953 ibport
= ini
->ib_port
;
954 lgr
->max_conns
= SMC_CONN_PER_LGR_MAX
;
955 lgr
->max_links
= SMC_LINKS_ADD_LNK_MAX
;
957 memcpy(lgr
->pnet_id
, ibdev
->pnetid
[ibport
- 1],
959 rc
= smc_wr_alloc_lgr_mem(lgr
);
962 smc_llc_lgr_init(lgr
, smc
);
964 link_idx
= SMC_SINGLE_LINK
;
965 lnk
= &lgr
->lnk
[link_idx
];
966 rc
= smcr_link_init(lgr
, lnk
, link_idx
, ini
);
968 smc_wr_free_lgr_mem(lgr
);
971 lgr
->net
= smc_ib_net(lnk
->smcibdev
);
972 lgr_list
= &smc_lgr_list
.list
;
973 lgr_lock
= &smc_lgr_list
.lock
;
974 lgr
->buf_type
= lgr
->net
->smc
.sysctl_smcr_buf_type
;
975 atomic_inc(&lgr_cnt
);
978 spin_lock_bh(lgr_lock
);
979 list_add_tail(&lgr
->list
, lgr_list
);
980 spin_unlock_bh(lgr_lock
);
984 destroy_workqueue(lgr
->tx_wq
);
988 if (ini
->is_smcd
&& ini
->vlan_id
)
989 smc_ism_put_vlan(ini
->ism_dev
[ini
->ism_selected
], ini
->vlan_id
);
993 rc
= SMC_CLC_DECL_MEM
;
995 rc
= SMC_CLC_DECL_INTERR
;
1000 static int smc_write_space(struct smc_connection
*conn
)
1002 int buffer_len
= conn
->peer_rmbe_size
;
1003 union smc_host_cursor prod
;
1004 union smc_host_cursor cons
;
1007 smc_curs_copy(&prod
, &conn
->local_tx_ctrl
.prod
, conn
);
1008 smc_curs_copy(&cons
, &conn
->local_rx_ctrl
.cons
, conn
);
1009 /* determine rx_buf space */
1010 space
= buffer_len
- smc_curs_diff(buffer_len
, &cons
, &prod
);
1014 static int smc_switch_cursor(struct smc_sock
*smc
, struct smc_cdc_tx_pend
*pend
,
1015 struct smc_wr_buf
*wr_buf
)
1017 struct smc_connection
*conn
= &smc
->conn
;
1018 union smc_host_cursor cons
, fin
;
1022 smc_curs_copy(&conn
->tx_curs_sent
, &conn
->tx_curs_fin
, conn
);
1023 smc_curs_copy(&fin
, &conn
->local_tx_ctrl_fin
, conn
);
1024 /* set prod cursor to old state, enforce tx_rdma_writes() */
1025 smc_curs_copy(&conn
->local_tx_ctrl
.prod
, &fin
, conn
);
1026 smc_curs_copy(&cons
, &conn
->local_rx_ctrl
.cons
, conn
);
1028 if (smc_curs_comp(conn
->peer_rmbe_size
, &cons
, &fin
) < 0) {
1029 /* cons cursor advanced more than fin, and prod was set
1030 * fin above, so now prod is smaller than cons. Fix that.
1032 diff
= smc_curs_diff(conn
->peer_rmbe_size
, &fin
, &cons
);
1033 smc_curs_add(conn
->sndbuf_desc
->len
,
1034 &conn
->tx_curs_sent
, diff
);
1035 smc_curs_add(conn
->sndbuf_desc
->len
,
1036 &conn
->tx_curs_fin
, diff
);
1038 smp_mb__before_atomic();
1039 atomic_add(diff
, &conn
->sndbuf_space
);
1040 smp_mb__after_atomic();
1042 smc_curs_add(conn
->peer_rmbe_size
,
1043 &conn
->local_tx_ctrl
.prod
, diff
);
1044 smc_curs_add(conn
->peer_rmbe_size
,
1045 &conn
->local_tx_ctrl_fin
, diff
);
1047 /* recalculate, value is used by tx_rdma_writes() */
1048 atomic_set(&smc
->conn
.peer_rmbe_space
, smc_write_space(conn
));
1050 if (smc
->sk
.sk_state
!= SMC_INIT
&&
1051 smc
->sk
.sk_state
!= SMC_CLOSED
) {
1052 rc
= smcr_cdc_msg_send_validation(conn
, pend
, wr_buf
);
1054 queue_delayed_work(conn
->lgr
->tx_wq
, &conn
->tx_work
, 0);
1055 smc
->sk
.sk_data_ready(&smc
->sk
);
1058 smc_wr_tx_put_slot(conn
->lnk
,
1059 (struct smc_wr_tx_pend_priv
*)pend
);
1064 void smc_switch_link_and_count(struct smc_connection
*conn
,
1065 struct smc_link
*to_lnk
)
1067 atomic_dec(&conn
->lnk
->conn_cnt
);
1068 /* link_hold in smc_conn_create() */
1069 smcr_link_put(conn
->lnk
);
1071 atomic_inc(&conn
->lnk
->conn_cnt
);
1072 /* link_put in smc_conn_free() */
1073 smcr_link_hold(conn
->lnk
);
1076 struct smc_link
*smc_switch_conns(struct smc_link_group
*lgr
,
1077 struct smc_link
*from_lnk
, bool is_dev_err
)
1079 struct smc_link
*to_lnk
= NULL
;
1080 struct smc_cdc_tx_pend
*pend
;
1081 struct smc_connection
*conn
;
1082 struct smc_wr_buf
*wr_buf
;
1083 struct smc_sock
*smc
;
1084 struct rb_node
*node
;
1087 /* link is inactive, wake up tx waiters */
1088 smc_wr_wakeup_tx_wait(from_lnk
);
1090 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1091 if (!smc_link_active(&lgr
->lnk
[i
]) || i
== from_lnk
->link_idx
)
1093 if (is_dev_err
&& from_lnk
->smcibdev
== lgr
->lnk
[i
].smcibdev
&&
1094 from_lnk
->ibport
== lgr
->lnk
[i
].ibport
) {
1097 to_lnk
= &lgr
->lnk
[i
];
1100 if (!to_lnk
|| !smc_wr_tx_link_hold(to_lnk
)) {
1101 smc_lgr_terminate_sched(lgr
);
1105 read_lock_bh(&lgr
->conns_lock
);
1106 for (node
= rb_first(&lgr
->conns_all
); node
; node
= rb_next(node
)) {
1107 conn
= rb_entry(node
, struct smc_connection
, alert_node
);
1108 if (conn
->lnk
!= from_lnk
)
1110 smc
= container_of(conn
, struct smc_sock
, conn
);
1111 /* conn->lnk not yet set in SMC_INIT state */
1112 if (smc
->sk
.sk_state
== SMC_INIT
)
1114 if (smc
->sk
.sk_state
== SMC_CLOSED
||
1115 smc
->sk
.sk_state
== SMC_PEERCLOSEWAIT1
||
1116 smc
->sk
.sk_state
== SMC_PEERCLOSEWAIT2
||
1117 smc
->sk
.sk_state
== SMC_APPFINCLOSEWAIT
||
1118 smc
->sk
.sk_state
== SMC_APPCLOSEWAIT1
||
1119 smc
->sk
.sk_state
== SMC_APPCLOSEWAIT2
||
1120 smc
->sk
.sk_state
== SMC_PEERFINCLOSEWAIT
||
1121 smc
->sk
.sk_state
== SMC_PEERABORTWAIT
||
1122 smc
->sk
.sk_state
== SMC_PROCESSABORT
) {
1123 spin_lock_bh(&conn
->send_lock
);
1124 smc_switch_link_and_count(conn
, to_lnk
);
1125 spin_unlock_bh(&conn
->send_lock
);
1128 sock_hold(&smc
->sk
);
1129 read_unlock_bh(&lgr
->conns_lock
);
1130 /* pre-fetch buffer outside of send_lock, might sleep */
1131 rc
= smc_cdc_get_free_slot(conn
, to_lnk
, &wr_buf
, NULL
, &pend
);
1134 /* avoid race with smcr_tx_sndbuf_nonempty() */
1135 spin_lock_bh(&conn
->send_lock
);
1136 smc_switch_link_and_count(conn
, to_lnk
);
1137 rc
= smc_switch_cursor(smc
, pend
, wr_buf
);
1138 spin_unlock_bh(&conn
->send_lock
);
1144 read_unlock_bh(&lgr
->conns_lock
);
1145 smc_wr_tx_link_put(to_lnk
);
1149 smcr_link_down_cond_sched(to_lnk
);
1150 smc_wr_tx_link_put(to_lnk
);
1154 static void smcr_buf_unuse(struct smc_buf_desc
*buf_desc
, bool is_rmb
,
1155 struct smc_link_group
*lgr
)
1157 struct rw_semaphore
*lock
; /* lock buffer list */
1160 if (is_rmb
&& buf_desc
->is_conf_rkey
&& !list_empty(&lgr
->list
)) {
1161 /* unregister rmb with peer */
1162 rc
= smc_llc_flow_initiate(lgr
, SMC_LLC_FLOW_RKEY
);
1164 /* protect against smc_llc_cli_rkey_exchange() */
1165 down_read(&lgr
->llc_conf_mutex
);
1166 smc_llc_do_delete_rkey(lgr
, buf_desc
);
1167 buf_desc
->is_conf_rkey
= false;
1168 up_read(&lgr
->llc_conf_mutex
);
1169 smc_llc_flow_stop(lgr
, &lgr
->llc_flow_lcl
);
1173 if (buf_desc
->is_reg_err
) {
1174 /* buf registration failed, reuse not possible */
1175 lock
= is_rmb
? &lgr
->rmbs_lock
:
1178 smc_lgr_buf_list_del(lgr
, is_rmb
, buf_desc
);
1181 smc_buf_free(lgr
, is_rmb
, buf_desc
);
1183 /* memzero_explicit provides potential memory barrier semantics */
1184 memzero_explicit(buf_desc
->cpu_addr
, buf_desc
->len
);
1185 WRITE_ONCE(buf_desc
->used
, 0);
1189 static void smcd_buf_detach(struct smc_connection
*conn
)
1191 struct smcd_dev
*smcd
= conn
->lgr
->smcd
;
1192 u64 peer_token
= conn
->peer_token
;
1194 if (!conn
->sndbuf_desc
)
1197 smc_ism_detach_dmb(smcd
, peer_token
);
1199 kfree(conn
->sndbuf_desc
);
1200 conn
->sndbuf_desc
= NULL
;
1203 static void smc_buf_unuse(struct smc_connection
*conn
,
1204 struct smc_link_group
*lgr
)
1206 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
1207 bool is_smcd
= lgr
->is_smcd
;
1210 if (conn
->sndbuf_desc
) {
1211 bufsize
= conn
->sndbuf_desc
->len
;
1212 if (!is_smcd
&& conn
->sndbuf_desc
->is_vm
) {
1213 smcr_buf_unuse(conn
->sndbuf_desc
, false, lgr
);
1215 memzero_explicit(conn
->sndbuf_desc
->cpu_addr
, bufsize
);
1216 WRITE_ONCE(conn
->sndbuf_desc
->used
, 0);
1218 SMC_STAT_RMB_SIZE(smc
, is_smcd
, false, false, bufsize
);
1220 if (conn
->rmb_desc
) {
1221 bufsize
= conn
->rmb_desc
->len
;
1223 smcr_buf_unuse(conn
->rmb_desc
, true, lgr
);
1225 bufsize
+= sizeof(struct smcd_cdc_msg
);
1226 memzero_explicit(conn
->rmb_desc
->cpu_addr
, bufsize
);
1227 WRITE_ONCE(conn
->rmb_desc
->used
, 0);
1229 SMC_STAT_RMB_SIZE(smc
, is_smcd
, true, false, bufsize
);
1233 /* remove a finished connection from its link group */
1234 void smc_conn_free(struct smc_connection
*conn
)
1236 struct smc_link_group
*lgr
= conn
->lgr
;
1238 if (!lgr
|| conn
->freed
)
1239 /* Connection has never been registered in a
1240 * link group, or has already been freed.
1245 if (!smc_conn_lgr_valid(conn
))
1246 /* Connection has already unregistered from
1252 if (!list_empty(&lgr
->list
))
1253 smc_ism_unset_conn(conn
);
1254 if (smc_ism_support_dmb_nocopy(lgr
->smcd
))
1255 smcd_buf_detach(conn
);
1256 tasklet_kill(&conn
->rx_tsklet
);
1258 smc_cdc_wait_pend_tx_wr(conn
);
1259 if (current_work() != &conn
->abort_work
)
1260 cancel_work_sync(&conn
->abort_work
);
1262 if (!list_empty(&lgr
->list
)) {
1263 smc_buf_unuse(conn
, lgr
); /* allow buffer reuse */
1264 smc_lgr_unregister_conn(conn
);
1267 if (!lgr
->conns_num
)
1268 smc_lgr_schedule_free_work(lgr
);
1271 smcr_link_put(conn
->lnk
); /* link_hold in smc_conn_create() */
1272 smc_lgr_put(lgr
); /* lgr_hold in smc_conn_create() */
1275 /* unregister a link from a buf_desc */
1276 static void smcr_buf_unmap_link(struct smc_buf_desc
*buf_desc
, bool is_rmb
,
1277 struct smc_link
*lnk
)
1279 if (is_rmb
|| buf_desc
->is_vm
)
1280 buf_desc
->is_reg_mr
[lnk
->link_idx
] = false;
1281 if (!buf_desc
->is_map_ib
[lnk
->link_idx
])
1284 if ((is_rmb
|| buf_desc
->is_vm
) &&
1285 buf_desc
->mr
[lnk
->link_idx
]) {
1286 smc_ib_put_memory_region(buf_desc
->mr
[lnk
->link_idx
]);
1287 buf_desc
->mr
[lnk
->link_idx
] = NULL
;
1290 smc_ib_buf_unmap_sg(lnk
, buf_desc
, DMA_FROM_DEVICE
);
1292 smc_ib_buf_unmap_sg(lnk
, buf_desc
, DMA_TO_DEVICE
);
1294 sg_free_table(&buf_desc
->sgt
[lnk
->link_idx
]);
1295 buf_desc
->is_map_ib
[lnk
->link_idx
] = false;
1298 /* unmap all buffers of lgr for a deleted link */
1299 static void smcr_buf_unmap_lgr(struct smc_link
*lnk
)
1301 struct smc_link_group
*lgr
= lnk
->lgr
;
1302 struct smc_buf_desc
*buf_desc
, *bf
;
1305 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1306 down_write(&lgr
->rmbs_lock
);
1307 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->rmbs
[i
], list
)
1308 smcr_buf_unmap_link(buf_desc
, true, lnk
);
1309 up_write(&lgr
->rmbs_lock
);
1311 down_write(&lgr
->sndbufs_lock
);
1312 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->sndbufs
[i
],
1314 smcr_buf_unmap_link(buf_desc
, false, lnk
);
1315 up_write(&lgr
->sndbufs_lock
);
1319 static void smcr_rtoken_clear_link(struct smc_link
*lnk
)
1321 struct smc_link_group
*lgr
= lnk
->lgr
;
1324 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
1325 lgr
->rtokens
[i
][lnk
->link_idx
].rkey
= 0;
1326 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
= 0;
1330 static void __smcr_link_clear(struct smc_link
*lnk
)
1332 struct smc_link_group
*lgr
= lnk
->lgr
;
1333 struct smc_ib_device
*smcibdev
;
1335 smc_wr_free_link_mem(lnk
);
1336 smc_ibdev_cnt_dec(lnk
);
1337 put_device(&lnk
->smcibdev
->ibdev
->dev
);
1338 smcibdev
= lnk
->smcibdev
;
1339 memset(lnk
, 0, sizeof(struct smc_link
));
1340 lnk
->state
= SMC_LNK_UNUSED
;
1341 if (!atomic_dec_return(&smcibdev
->lnk_cnt
))
1342 wake_up(&smcibdev
->lnks_deleted
);
1343 smc_lgr_put(lgr
); /* lgr_hold in smcr_link_init() */
1346 /* must be called under lgr->llc_conf_mutex lock */
1347 void smcr_link_clear(struct smc_link
*lnk
, bool log
)
1349 if (!lnk
->lgr
|| lnk
->clearing
||
1350 lnk
->state
== SMC_LNK_UNUSED
)
1354 smc_llc_link_clear(lnk
, log
);
1355 smcr_buf_unmap_lgr(lnk
);
1356 smcr_rtoken_clear_link(lnk
);
1357 smc_ib_modify_qp_error(lnk
);
1358 smc_wr_free_link(lnk
);
1359 smc_ib_destroy_queue_pair(lnk
);
1360 smc_ib_dealloc_protection_domain(lnk
);
1361 smcr_link_put(lnk
); /* theoretically last link_put */
1364 void smcr_link_hold(struct smc_link
*lnk
)
1366 refcount_inc(&lnk
->refcnt
);
1369 void smcr_link_put(struct smc_link
*lnk
)
1371 if (refcount_dec_and_test(&lnk
->refcnt
))
1372 __smcr_link_clear(lnk
);
1375 static void smcr_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
1376 struct smc_buf_desc
*buf_desc
)
1380 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++)
1381 smcr_buf_unmap_link(buf_desc
, is_rmb
, &lgr
->lnk
[i
]);
1383 if (!buf_desc
->is_vm
&& buf_desc
->pages
)
1384 __free_pages(buf_desc
->pages
, buf_desc
->order
);
1385 else if (buf_desc
->is_vm
&& buf_desc
->cpu_addr
)
1386 vfree(buf_desc
->cpu_addr
);
1390 static void smcd_buf_free(struct smc_link_group
*lgr
, bool is_dmb
,
1391 struct smc_buf_desc
*buf_desc
)
1394 /* restore original buf len */
1395 buf_desc
->len
+= sizeof(struct smcd_cdc_msg
);
1396 smc_ism_unregister_dmb(lgr
->smcd
, buf_desc
);
1398 kfree(buf_desc
->cpu_addr
);
1403 static void smc_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
1404 struct smc_buf_desc
*buf_desc
)
1407 smcd_buf_free(lgr
, is_rmb
, buf_desc
);
1409 smcr_buf_free(lgr
, is_rmb
, buf_desc
);
1412 static void __smc_lgr_free_bufs(struct smc_link_group
*lgr
, bool is_rmb
)
1414 struct smc_buf_desc
*buf_desc
, *bf_desc
;
1415 struct list_head
*buf_list
;
1418 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1420 buf_list
= &lgr
->rmbs
[i
];
1422 buf_list
= &lgr
->sndbufs
[i
];
1423 list_for_each_entry_safe(buf_desc
, bf_desc
, buf_list
,
1425 smc_lgr_buf_list_del(lgr
, is_rmb
, buf_desc
);
1426 smc_buf_free(lgr
, is_rmb
, buf_desc
);
1431 static void smc_lgr_free_bufs(struct smc_link_group
*lgr
)
1433 /* free send buffers */
1434 __smc_lgr_free_bufs(lgr
, false);
1436 __smc_lgr_free_bufs(lgr
, true);
1439 /* won't be freed until no one accesses to lgr anymore */
1440 static void __smc_lgr_free(struct smc_link_group
*lgr
)
1442 smc_lgr_free_bufs(lgr
);
1444 if (!atomic_dec_return(&lgr
->smcd
->lgr_cnt
))
1445 wake_up(&lgr
->smcd
->lgrs_deleted
);
1447 smc_wr_free_lgr_mem(lgr
);
1448 if (!atomic_dec_return(&lgr_cnt
))
1449 wake_up(&lgrs_deleted
);
1454 /* remove a link group */
1455 static void smc_lgr_free(struct smc_link_group
*lgr
)
1459 if (!lgr
->is_smcd
) {
1460 down_write(&lgr
->llc_conf_mutex
);
1461 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1462 if (lgr
->lnk
[i
].state
!= SMC_LNK_UNUSED
)
1463 smcr_link_clear(&lgr
->lnk
[i
], false);
1465 up_write(&lgr
->llc_conf_mutex
);
1466 smc_llc_lgr_clear(lgr
);
1469 destroy_workqueue(lgr
->tx_wq
);
1471 smc_ism_put_vlan(lgr
->smcd
, lgr
->vlan_id
);
1472 put_device(lgr
->smcd
->ops
->get_dev(lgr
->smcd
));
1474 smc_lgr_put(lgr
); /* theoretically last lgr_put */
1477 void smc_lgr_hold(struct smc_link_group
*lgr
)
1479 refcount_inc(&lgr
->refcnt
);
1482 void smc_lgr_put(struct smc_link_group
*lgr
)
1484 if (refcount_dec_and_test(&lgr
->refcnt
))
1485 __smc_lgr_free(lgr
);
1488 static void smc_sk_wake_ups(struct smc_sock
*smc
)
1490 smc
->sk
.sk_write_space(&smc
->sk
);
1491 smc
->sk
.sk_data_ready(&smc
->sk
);
1492 smc
->sk
.sk_state_change(&smc
->sk
);
1495 /* kill a connection */
1496 static void smc_conn_kill(struct smc_connection
*conn
, bool soft
)
1498 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
1500 if (conn
->lgr
->is_smcd
&& conn
->lgr
->peer_shutdown
)
1501 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
= 1;
1503 smc_close_abort(conn
);
1505 smc
->sk
.sk_err
= ECONNABORTED
;
1506 smc_sk_wake_ups(smc
);
1507 if (conn
->lgr
->is_smcd
) {
1508 smc_ism_unset_conn(conn
);
1509 if (smc_ism_support_dmb_nocopy(conn
->lgr
->smcd
))
1510 smcd_buf_detach(conn
);
1512 tasklet_kill(&conn
->rx_tsklet
);
1514 tasklet_unlock_wait(&conn
->rx_tsklet
);
1516 smc_cdc_wait_pend_tx_wr(conn
);
1518 smc_lgr_unregister_conn(conn
);
1519 smc_close_active_abort(smc
);
1522 static void smc_lgr_cleanup(struct smc_link_group
*lgr
)
1525 smc_ism_signal_shutdown(lgr
);
1527 u32 rsn
= lgr
->llc_termination_rsn
;
1530 rsn
= SMC_LLC_DEL_PROG_INIT_TERM
;
1531 smc_llc_send_link_delete_all(lgr
, false, rsn
);
1532 smcr_lgr_link_deactivate_all(lgr
);
1536 /* terminate link group
1537 * @soft: true if link group shutdown can take its time
1538 * false if immediate link group shutdown is required
1540 static void __smc_lgr_terminate(struct smc_link_group
*lgr
, bool soft
)
1542 struct smc_connection
*conn
;
1543 struct smc_sock
*smc
;
1544 struct rb_node
*node
;
1546 if (lgr
->terminating
)
1547 return; /* lgr already terminating */
1548 /* cancel free_work sync, will terminate when lgr->freeing is set */
1549 cancel_delayed_work(&lgr
->free_work
);
1550 lgr
->terminating
= 1;
1552 /* kill remaining link group connections */
1553 read_lock_bh(&lgr
->conns_lock
);
1554 node
= rb_first(&lgr
->conns_all
);
1556 read_unlock_bh(&lgr
->conns_lock
);
1557 conn
= rb_entry(node
, struct smc_connection
, alert_node
);
1558 smc
= container_of(conn
, struct smc_sock
, conn
);
1559 sock_hold(&smc
->sk
); /* sock_put below */
1560 lock_sock(&smc
->sk
);
1561 smc_conn_kill(conn
, soft
);
1562 release_sock(&smc
->sk
);
1563 sock_put(&smc
->sk
); /* sock_hold above */
1564 read_lock_bh(&lgr
->conns_lock
);
1565 node
= rb_first(&lgr
->conns_all
);
1567 read_unlock_bh(&lgr
->conns_lock
);
1568 smc_lgr_cleanup(lgr
);
1572 /* unlink link group and schedule termination */
1573 void smc_lgr_terminate_sched(struct smc_link_group
*lgr
)
1575 spinlock_t
*lgr_lock
;
1577 smc_lgr_list_head(lgr
, &lgr_lock
);
1578 spin_lock_bh(lgr_lock
);
1579 if (list_empty(&lgr
->list
) || lgr
->terminating
|| lgr
->freeing
) {
1580 spin_unlock_bh(lgr_lock
);
1581 return; /* lgr already terminating */
1583 list_del_init(&lgr
->list
);
1585 spin_unlock_bh(lgr_lock
);
1586 schedule_work(&lgr
->terminate_work
);
1589 /* Called when peer lgr shutdown (regularly or abnormally) is received */
1590 void smc_smcd_terminate(struct smcd_dev
*dev
, struct smcd_gid
*peer_gid
,
1591 unsigned short vlan
)
1593 struct smc_link_group
*lgr
, *l
;
1594 LIST_HEAD(lgr_free_list
);
1596 /* run common cleanup function and build free list */
1597 spin_lock_bh(&dev
->lgr_lock
);
1598 list_for_each_entry_safe(lgr
, l
, &dev
->lgr_list
, list
) {
1599 if ((!peer_gid
->gid
||
1600 (lgr
->peer_gid
.gid
== peer_gid
->gid
&&
1601 !smc_ism_is_emulated(dev
) ? 1 :
1602 lgr
->peer_gid
.gid_ext
== peer_gid
->gid_ext
)) &&
1603 (vlan
== VLAN_VID_MASK
|| lgr
->vlan_id
== vlan
)) {
1604 if (peer_gid
->gid
) /* peer triggered termination */
1605 lgr
->peer_shutdown
= 1;
1606 list_move(&lgr
->list
, &lgr_free_list
);
1610 spin_unlock_bh(&dev
->lgr_lock
);
1612 /* cancel the regular free workers and actually free lgrs */
1613 list_for_each_entry_safe(lgr
, l
, &lgr_free_list
, list
) {
1614 list_del_init(&lgr
->list
);
1615 schedule_work(&lgr
->terminate_work
);
1619 /* Called when an SMCD device is removed or the smc module is unloaded */
1620 void smc_smcd_terminate_all(struct smcd_dev
*smcd
)
1622 struct smc_link_group
*lgr
, *lg
;
1623 LIST_HEAD(lgr_free_list
);
1625 spin_lock_bh(&smcd
->lgr_lock
);
1626 list_splice_init(&smcd
->lgr_list
, &lgr_free_list
);
1627 list_for_each_entry(lgr
, &lgr_free_list
, list
)
1629 spin_unlock_bh(&smcd
->lgr_lock
);
1631 list_for_each_entry_safe(lgr
, lg
, &lgr_free_list
, list
) {
1632 list_del_init(&lgr
->list
);
1633 __smc_lgr_terminate(lgr
, false);
1636 if (atomic_read(&smcd
->lgr_cnt
))
1637 wait_event(smcd
->lgrs_deleted
, !atomic_read(&smcd
->lgr_cnt
));
1640 /* Called when an SMCR device is removed or the smc module is unloaded.
1641 * If smcibdev is given, all SMCR link groups using this device are terminated.
1642 * If smcibdev is NULL, all SMCR link groups are terminated.
1644 void smc_smcr_terminate_all(struct smc_ib_device
*smcibdev
)
1646 struct smc_link_group
*lgr
, *lg
;
1647 LIST_HEAD(lgr_free_list
);
1650 spin_lock_bh(&smc_lgr_list
.lock
);
1652 list_splice_init(&smc_lgr_list
.list
, &lgr_free_list
);
1653 list_for_each_entry(lgr
, &lgr_free_list
, list
)
1656 list_for_each_entry_safe(lgr
, lg
, &smc_lgr_list
.list
, list
) {
1657 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1658 if (lgr
->lnk
[i
].smcibdev
== smcibdev
)
1659 smcr_link_down_cond_sched(&lgr
->lnk
[i
]);
1663 spin_unlock_bh(&smc_lgr_list
.lock
);
1665 list_for_each_entry_safe(lgr
, lg
, &lgr_free_list
, list
) {
1666 list_del_init(&lgr
->list
);
1667 smc_llc_set_termination_rsn(lgr
, SMC_LLC_DEL_OP_INIT_TERM
);
1668 __smc_lgr_terminate(lgr
, false);
1672 if (atomic_read(&smcibdev
->lnk_cnt
))
1673 wait_event(smcibdev
->lnks_deleted
,
1674 !atomic_read(&smcibdev
->lnk_cnt
));
1676 if (atomic_read(&lgr_cnt
))
1677 wait_event(lgrs_deleted
, !atomic_read(&lgr_cnt
));
1681 /* set new lgr type and clear all asymmetric link tagging */
1682 void smcr_lgr_set_type(struct smc_link_group
*lgr
, enum smc_lgr_type new_type
)
1684 char *lgr_type
= "";
1687 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++)
1688 if (smc_link_usable(&lgr
->lnk
[i
]))
1689 lgr
->lnk
[i
].link_is_asym
= false;
1690 if (lgr
->type
== new_type
)
1692 lgr
->type
= new_type
;
1694 switch (lgr
->type
) {
1698 case SMC_LGR_SINGLE
:
1699 lgr_type
= "SINGLE";
1701 case SMC_LGR_SYMMETRIC
:
1702 lgr_type
= "SYMMETRIC";
1704 case SMC_LGR_ASYMMETRIC_PEER
:
1705 lgr_type
= "ASYMMETRIC_PEER";
1707 case SMC_LGR_ASYMMETRIC_LOCAL
:
1708 lgr_type
= "ASYMMETRIC_LOCAL";
1711 pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: "
1712 "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE
, &lgr
->id
,
1713 lgr
->net
->net_cookie
, lgr_type
, lgr
->pnet_id
);
1716 /* set new lgr type and tag a link as asymmetric */
1717 void smcr_lgr_set_type_asym(struct smc_link_group
*lgr
,
1718 enum smc_lgr_type new_type
, int asym_lnk_idx
)
1720 smcr_lgr_set_type(lgr
, new_type
);
1721 lgr
->lnk
[asym_lnk_idx
].link_is_asym
= true;
1724 /* abort connection, abort_work scheduled from tasklet context */
1725 static void smc_conn_abort_work(struct work_struct
*work
)
1727 struct smc_connection
*conn
= container_of(work
,
1728 struct smc_connection
,
1730 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
1732 lock_sock(&smc
->sk
);
1733 smc_conn_kill(conn
, true);
1734 release_sock(&smc
->sk
);
1735 sock_put(&smc
->sk
); /* sock_hold done by schedulers of abort_work */
1738 void smcr_port_add(struct smc_ib_device
*smcibdev
, u8 ibport
)
1740 struct smc_link_group
*lgr
, *n
;
1742 spin_lock_bh(&smc_lgr_list
.lock
);
1743 list_for_each_entry_safe(lgr
, n
, &smc_lgr_list
.list
, list
) {
1744 struct smc_link
*link
;
1746 if (strncmp(smcibdev
->pnetid
[ibport
- 1], lgr
->pnet_id
,
1747 SMC_MAX_PNETID_LEN
) ||
1748 lgr
->type
== SMC_LGR_SYMMETRIC
||
1749 lgr
->type
== SMC_LGR_ASYMMETRIC_PEER
||
1750 !rdma_dev_access_netns(smcibdev
->ibdev
, lgr
->net
))
1753 if (lgr
->type
== SMC_LGR_SINGLE
&& lgr
->max_links
<= 1)
1756 /* trigger local add link processing */
1757 link
= smc_llc_usable_link(lgr
);
1759 smc_llc_add_link_local(link
);
1761 spin_unlock_bh(&smc_lgr_list
.lock
);
1764 /* link is down - switch connections to alternate link,
1765 * must be called under lgr->llc_conf_mutex lock
1767 static void smcr_link_down(struct smc_link
*lnk
)
1769 struct smc_link_group
*lgr
= lnk
->lgr
;
1770 struct smc_link
*to_lnk
;
1773 if (!lgr
|| lnk
->state
== SMC_LNK_UNUSED
|| list_empty(&lgr
->list
))
1776 to_lnk
= smc_switch_conns(lgr
, lnk
, true);
1777 if (!to_lnk
) { /* no backup link available */
1778 smcr_link_clear(lnk
, true);
1781 smcr_lgr_set_type(lgr
, SMC_LGR_SINGLE
);
1782 del_link_id
= lnk
->link_id
;
1784 if (lgr
->role
== SMC_SERV
) {
1785 /* trigger local delete link processing */
1786 smc_llc_srv_delete_link_local(to_lnk
, del_link_id
);
1788 if (lgr
->llc_flow_lcl
.type
!= SMC_LLC_FLOW_NONE
) {
1789 /* another llc task is ongoing */
1790 up_write(&lgr
->llc_conf_mutex
);
1791 wait_event_timeout(lgr
->llc_flow_waiter
,
1792 (list_empty(&lgr
->list
) ||
1793 lgr
->llc_flow_lcl
.type
== SMC_LLC_FLOW_NONE
),
1795 down_write(&lgr
->llc_conf_mutex
);
1797 if (!list_empty(&lgr
->list
)) {
1798 smc_llc_send_delete_link(to_lnk
, del_link_id
,
1800 SMC_LLC_DEL_LOST_PATH
);
1801 smcr_link_clear(lnk
, true);
1803 wake_up(&lgr
->llc_flow_waiter
); /* wake up next waiter */
1807 /* must be called under lgr->llc_conf_mutex lock */
1808 void smcr_link_down_cond(struct smc_link
*lnk
)
1810 if (smc_link_downing(&lnk
->state
)) {
1811 trace_smcr_link_down(lnk
, __builtin_return_address(0));
1812 smcr_link_down(lnk
);
1816 /* will get the lgr->llc_conf_mutex lock */
1817 void smcr_link_down_cond_sched(struct smc_link
*lnk
)
1819 if (smc_link_downing(&lnk
->state
)) {
1820 trace_smcr_link_down(lnk
, __builtin_return_address(0));
1821 schedule_work(&lnk
->link_down_wrk
);
1825 void smcr_port_err(struct smc_ib_device
*smcibdev
, u8 ibport
)
1827 struct smc_link_group
*lgr
, *n
;
1830 list_for_each_entry_safe(lgr
, n
, &smc_lgr_list
.list
, list
) {
1831 if (strncmp(smcibdev
->pnetid
[ibport
- 1], lgr
->pnet_id
,
1832 SMC_MAX_PNETID_LEN
))
1833 continue; /* lgr is not affected */
1834 if (list_empty(&lgr
->list
))
1836 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1837 struct smc_link
*lnk
= &lgr
->lnk
[i
];
1839 if (smc_link_usable(lnk
) &&
1840 lnk
->smcibdev
== smcibdev
&& lnk
->ibport
== ibport
)
1841 smcr_link_down_cond_sched(lnk
);
1846 static void smc_link_down_work(struct work_struct
*work
)
1848 struct smc_link
*link
= container_of(work
, struct smc_link
,
1850 struct smc_link_group
*lgr
= link
->lgr
;
1852 if (list_empty(&lgr
->list
))
1854 wake_up_all(&lgr
->llc_msg_waiter
);
1855 down_write(&lgr
->llc_conf_mutex
);
1856 smcr_link_down(link
);
1857 up_write(&lgr
->llc_conf_mutex
);
1860 static int smc_vlan_by_tcpsk_walk(struct net_device
*lower_dev
,
1861 struct netdev_nested_priv
*priv
)
1863 unsigned short *vlan_id
= (unsigned short *)priv
->data
;
1865 if (is_vlan_dev(lower_dev
)) {
1866 *vlan_id
= vlan_dev_vlan_id(lower_dev
);
1873 /* Determine vlan of internal TCP socket. */
1874 int smc_vlan_by_tcpsk(struct socket
*clcsock
, struct smc_init_info
*ini
)
1876 struct dst_entry
*dst
= sk_dst_get(clcsock
->sk
);
1877 struct netdev_nested_priv priv
;
1878 struct net_device
*ndev
;
1892 if (is_vlan_dev(ndev
)) {
1893 ini
->vlan_id
= vlan_dev_vlan_id(ndev
);
1897 priv
.data
= (void *)&ini
->vlan_id
;
1899 netdev_walk_all_lower_dev(ndev
, smc_vlan_by_tcpsk_walk
, &priv
);
1908 static bool smcr_lgr_match(struct smc_link_group
*lgr
, u8 smcr_version
,
1912 enum smc_lgr_role role
, u32 clcqpn
,
1915 struct smc_link
*lnk
;
1918 if (memcmp(lgr
->peer_systemid
, peer_systemid
, SMC_SYSTEMID_LEN
) ||
1922 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1925 if (!smc_link_active(lnk
))
1927 /* use verbs API to check netns, instead of lgr->net */
1928 if (!rdma_dev_access_netns(lnk
->smcibdev
->ibdev
, net
))
1930 if ((lgr
->role
== SMC_SERV
|| lnk
->peer_qpn
== clcqpn
) &&
1931 !memcmp(lnk
->peer_gid
, peer_gid
, SMC_GID_SIZE
) &&
1932 (smcr_version
== SMC_V2
||
1933 !memcmp(lnk
->peer_mac
, peer_mac_v1
, ETH_ALEN
)))
1939 static bool smcd_lgr_match(struct smc_link_group
*lgr
,
1940 struct smcd_dev
*smcismdev
,
1941 struct smcd_gid
*peer_gid
)
1943 if (lgr
->peer_gid
.gid
!= peer_gid
->gid
||
1944 lgr
->smcd
!= smcismdev
)
1947 if (smc_ism_is_emulated(smcismdev
) &&
1948 lgr
->peer_gid
.gid_ext
!= peer_gid
->gid_ext
)
1954 /* create a new SMC connection (and a new link group if necessary) */
1955 int smc_conn_create(struct smc_sock
*smc
, struct smc_init_info
*ini
)
1957 struct smc_connection
*conn
= &smc
->conn
;
1958 struct net
*net
= sock_net(&smc
->sk
);
1959 struct list_head
*lgr_list
;
1960 struct smc_link_group
*lgr
;
1961 enum smc_lgr_role role
;
1962 spinlock_t
*lgr_lock
;
1965 lgr_list
= ini
->is_smcd
? &ini
->ism_dev
[ini
->ism_selected
]->lgr_list
:
1967 lgr_lock
= ini
->is_smcd
? &ini
->ism_dev
[ini
->ism_selected
]->lgr_lock
:
1969 ini
->first_contact_local
= 1;
1970 role
= smc
->listen_smc
? SMC_SERV
: SMC_CLNT
;
1971 if (role
== SMC_CLNT
&& ini
->first_contact_peer
)
1972 /* create new link group as well */
1975 /* determine if an existing link group can be reused */
1976 spin_lock_bh(lgr_lock
);
1977 list_for_each_entry(lgr
, lgr_list
, list
) {
1978 write_lock_bh(&lgr
->conns_lock
);
1980 smcd_lgr_match(lgr
, ini
->ism_dev
[ini
->ism_selected
],
1981 &ini
->ism_peer_gid
[ini
->ism_selected
]) :
1982 smcr_lgr_match(lgr
, ini
->smcr_version
,
1984 ini
->peer_gid
, ini
->peer_mac
, role
,
1985 ini
->ib_clcqpn
, net
)) &&
1987 (ini
->smcd_version
== SMC_V2
||
1988 lgr
->vlan_id
== ini
->vlan_id
) &&
1989 (role
== SMC_CLNT
|| ini
->is_smcd
||
1990 (lgr
->conns_num
< lgr
->max_conns
&&
1991 !bitmap_full(lgr
->rtokens_used_mask
, SMC_RMBS_PER_LGR_MAX
)))) {
1992 /* link group found */
1993 ini
->first_contact_local
= 0;
1995 rc
= smc_lgr_register_conn(conn
, false);
1996 write_unlock_bh(&lgr
->conns_lock
);
1997 if (!rc
&& delayed_work_pending(&lgr
->free_work
))
1998 cancel_delayed_work(&lgr
->free_work
);
2001 write_unlock_bh(&lgr
->conns_lock
);
2003 spin_unlock_bh(lgr_lock
);
2007 if (role
== SMC_CLNT
&& !ini
->first_contact_peer
&&
2008 ini
->first_contact_local
) {
2009 /* Server reuses a link group, but Client wants to start
2011 * send out_of_sync decline, reason synchr. error
2013 return SMC_CLC_DECL_SYNCERR
;
2017 if (ini
->first_contact_local
) {
2018 rc
= smc_lgr_create(smc
, ini
);
2022 write_lock_bh(&lgr
->conns_lock
);
2023 rc
= smc_lgr_register_conn(conn
, true);
2024 write_unlock_bh(&lgr
->conns_lock
);
2026 smc_lgr_cleanup_early(lgr
);
2030 smc_lgr_hold(conn
->lgr
); /* lgr_put in smc_conn_free() */
2031 if (!conn
->lgr
->is_smcd
)
2032 smcr_link_hold(conn
->lnk
); /* link_put in smc_conn_free() */
2034 conn
->local_tx_ctrl
.common
.type
= SMC_CDC_MSG_TYPE
;
2035 conn
->local_tx_ctrl
.len
= SMC_WR_TX_SIZE
;
2036 conn
->urg_state
= SMC_URG_READ
;
2037 init_waitqueue_head(&conn
->cdc_pend_tx_wq
);
2038 INIT_WORK(&smc
->conn
.abort_work
, smc_conn_abort_work
);
2040 conn
->rx_off
= sizeof(struct smcd_cdc_msg
);
2041 smcd_cdc_rx_init(conn
); /* init tasklet for this conn */
2045 #ifndef KERNEL_HAS_ATOMIC64
2046 spin_lock_init(&conn
->acurs_lock
);
2053 #define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
2054 #define SMCR_RMBE_SIZES 15 /* 0 -> 16KB, 1 -> 32KB, .. 15 -> 512MB */
2056 /* convert the RMB size into the compressed notation (minimum 16K, see
2057 * SMCD/R_DMBE_SIZES.
2058 * In contrast to plain ilog2, this rounds towards the next power of 2,
2059 * so the socket application gets at least its desired sndbuf / rcvbuf size.
2061 static u8
smc_compress_bufsize(int size
, bool is_smcd
, bool is_rmb
)
2065 if (size
<= SMC_BUF_MIN_SIZE
)
2068 size
= (size
- 1) >> 14; /* convert to 16K multiple */
2069 compressed
= min_t(u8
, ilog2(size
) + 1,
2070 is_smcd
? SMCD_DMBE_SIZES
: SMCR_RMBE_SIZES
);
2072 #ifdef CONFIG_ARCH_NO_SG_CHAIN
2073 if (!is_smcd
&& is_rmb
)
2074 /* RMBs are backed by & limited to max size of scatterlists */
2075 compressed
= min_t(u8
, compressed
, ilog2((SG_MAX_SINGLE_ALLOC
* PAGE_SIZE
) >> 14));
2081 /* convert the RMB size from compressed notation into integer */
2082 int smc_uncompress_bufsize(u8 compressed
)
2086 size
= 0x00000001 << (((int)compressed
) + 14);
2090 /* try to reuse a sndbuf or rmb description slot for a certain
2091 * buffer size; if not available, return NULL
2093 static struct smc_buf_desc
*smc_buf_get_slot(int compressed_bufsize
,
2094 struct rw_semaphore
*lock
,
2095 struct list_head
*buf_list
)
2097 struct smc_buf_desc
*buf_slot
;
2100 list_for_each_entry(buf_slot
, buf_list
, list
) {
2101 if (cmpxchg(&buf_slot
->used
, 0, 1) == 0) {
2110 /* one of the conditions for announcing a receiver's current window size is
2111 * that it "results in a minimum increase in the window size of 10% of the
2112 * receive buffer space" [RFC7609]
2114 static inline int smc_rmb_wnd_update_limit(int rmbe_size
)
2116 return max_t(int, rmbe_size
/ 10, SOCK_MIN_SNDBUF
/ 2);
2119 /* map an buf to a link */
2120 static int smcr_buf_map_link(struct smc_buf_desc
*buf_desc
, bool is_rmb
,
2121 struct smc_link
*lnk
)
2123 int rc
, i
, nents
, offset
, buf_size
, size
, access_flags
;
2124 struct scatterlist
*sg
;
2127 if (buf_desc
->is_map_ib
[lnk
->link_idx
])
2130 if (buf_desc
->is_vm
) {
2131 buf
= buf_desc
->cpu_addr
;
2132 buf_size
= buf_desc
->len
;
2133 offset
= offset_in_page(buf_desc
->cpu_addr
);
2134 nents
= PAGE_ALIGN(buf_size
+ offset
) / PAGE_SIZE
;
2139 rc
= sg_alloc_table(&buf_desc
->sgt
[lnk
->link_idx
], nents
, GFP_KERNEL
);
2143 if (buf_desc
->is_vm
) {
2144 /* virtually contiguous buffer */
2145 for_each_sg(buf_desc
->sgt
[lnk
->link_idx
].sgl
, sg
, nents
, i
) {
2146 size
= min_t(int, PAGE_SIZE
- offset
, buf_size
);
2147 sg_set_page(sg
, vmalloc_to_page(buf
), size
, offset
);
2148 buf
+= size
/ sizeof(*buf
);
2153 /* physically contiguous buffer */
2154 sg_set_buf(buf_desc
->sgt
[lnk
->link_idx
].sgl
,
2155 buf_desc
->cpu_addr
, buf_desc
->len
);
2158 /* map sg table to DMA address */
2159 rc
= smc_ib_buf_map_sg(lnk
, buf_desc
,
2160 is_rmb
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
);
2161 /* SMC protocol depends on mapping to one DMA address only */
2167 buf_desc
->is_dma_need_sync
|=
2168 smc_ib_is_sg_need_sync(lnk
, buf_desc
) << lnk
->link_idx
;
2170 if (is_rmb
|| buf_desc
->is_vm
) {
2171 /* create a new memory region for the RMB or vzalloced sndbuf */
2172 access_flags
= is_rmb
?
2173 IB_ACCESS_REMOTE_WRITE
| IB_ACCESS_LOCAL_WRITE
:
2174 IB_ACCESS_LOCAL_WRITE
;
2176 rc
= smc_ib_get_memory_region(lnk
->roce_pd
, access_flags
,
2177 buf_desc
, lnk
->link_idx
);
2180 smc_ib_sync_sg_for_device(lnk
, buf_desc
,
2181 is_rmb
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
);
2183 buf_desc
->is_map_ib
[lnk
->link_idx
] = true;
2187 smc_ib_buf_unmap_sg(lnk
, buf_desc
,
2188 is_rmb
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
);
2190 sg_free_table(&buf_desc
->sgt
[lnk
->link_idx
]);
2194 /* register a new buf on IB device, rmb or vzalloced sndbuf
2195 * must be called under lgr->llc_conf_mutex lock
2197 int smcr_link_reg_buf(struct smc_link
*link
, struct smc_buf_desc
*buf_desc
)
2199 if (list_empty(&link
->lgr
->list
))
2201 if (!buf_desc
->is_reg_mr
[link
->link_idx
]) {
2202 /* register memory region for new buf */
2203 if (buf_desc
->is_vm
)
2204 buf_desc
->mr
[link
->link_idx
]->iova
=
2205 (uintptr_t)buf_desc
->cpu_addr
;
2206 if (smc_wr_reg_send(link
, buf_desc
->mr
[link
->link_idx
])) {
2207 buf_desc
->is_reg_err
= true;
2210 buf_desc
->is_reg_mr
[link
->link_idx
] = true;
2215 static int _smcr_buf_map_lgr(struct smc_link
*lnk
, struct rw_semaphore
*lock
,
2216 struct list_head
*lst
, bool is_rmb
)
2218 struct smc_buf_desc
*buf_desc
, *bf
;
2222 list_for_each_entry_safe(buf_desc
, bf
, lst
, list
) {
2223 if (!buf_desc
->used
)
2225 rc
= smcr_buf_map_link(buf_desc
, is_rmb
, lnk
);
2234 /* map all used buffers of lgr for a new link */
2235 int smcr_buf_map_lgr(struct smc_link
*lnk
)
2237 struct smc_link_group
*lgr
= lnk
->lgr
;
2240 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
2241 rc
= _smcr_buf_map_lgr(lnk
, &lgr
->rmbs_lock
,
2242 &lgr
->rmbs
[i
], true);
2245 rc
= _smcr_buf_map_lgr(lnk
, &lgr
->sndbufs_lock
,
2246 &lgr
->sndbufs
[i
], false);
2253 /* register all used buffers of lgr for a new link,
2254 * must be called under lgr->llc_conf_mutex lock
2256 int smcr_buf_reg_lgr(struct smc_link
*lnk
)
2258 struct smc_link_group
*lgr
= lnk
->lgr
;
2259 struct smc_buf_desc
*buf_desc
, *bf
;
2262 /* reg all RMBs for a new link */
2263 down_write(&lgr
->rmbs_lock
);
2264 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
2265 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->rmbs
[i
], list
) {
2266 if (!buf_desc
->used
)
2268 rc
= smcr_link_reg_buf(lnk
, buf_desc
);
2270 up_write(&lgr
->rmbs_lock
);
2275 up_write(&lgr
->rmbs_lock
);
2277 if (lgr
->buf_type
== SMCR_PHYS_CONT_BUFS
)
2280 /* reg all vzalloced sndbufs for a new link */
2281 down_write(&lgr
->sndbufs_lock
);
2282 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
2283 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->sndbufs
[i
], list
) {
2284 if (!buf_desc
->used
|| !buf_desc
->is_vm
)
2286 rc
= smcr_link_reg_buf(lnk
, buf_desc
);
2288 up_write(&lgr
->sndbufs_lock
);
2293 up_write(&lgr
->sndbufs_lock
);
2297 static struct smc_buf_desc
*smcr_new_buf_create(struct smc_link_group
*lgr
,
2300 struct smc_buf_desc
*buf_desc
;
2302 /* try to alloc a new buffer */
2303 buf_desc
= kzalloc(sizeof(*buf_desc
), GFP_KERNEL
);
2305 return ERR_PTR(-ENOMEM
);
2307 switch (lgr
->buf_type
) {
2308 case SMCR_PHYS_CONT_BUFS
:
2309 case SMCR_MIXED_BUFS
:
2310 buf_desc
->order
= get_order(bufsize
);
2311 buf_desc
->pages
= alloc_pages(GFP_KERNEL
| __GFP_NOWARN
|
2312 __GFP_NOMEMALLOC
| __GFP_COMP
|
2313 __GFP_NORETRY
| __GFP_ZERO
,
2315 if (buf_desc
->pages
) {
2316 buf_desc
->cpu_addr
=
2317 (void *)page_address(buf_desc
->pages
);
2318 buf_desc
->len
= bufsize
;
2319 buf_desc
->is_vm
= false;
2322 if (lgr
->buf_type
== SMCR_PHYS_CONT_BUFS
)
2324 fallthrough
; // try virtually contiguous buf
2325 case SMCR_VIRT_CONT_BUFS
:
2326 buf_desc
->order
= get_order(bufsize
);
2327 buf_desc
->cpu_addr
= vzalloc(PAGE_SIZE
<< buf_desc
->order
);
2328 if (!buf_desc
->cpu_addr
)
2330 buf_desc
->pages
= NULL
;
2331 buf_desc
->len
= bufsize
;
2332 buf_desc
->is_vm
= true;
2339 return ERR_PTR(-EAGAIN
);
2342 /* map buf_desc on all usable links,
2343 * unused buffers stay mapped as long as the link is up
2345 static int smcr_buf_map_usable_links(struct smc_link_group
*lgr
,
2346 struct smc_buf_desc
*buf_desc
, bool is_rmb
)
2348 int i
, rc
= 0, cnt
= 0;
2350 /* protect against parallel link reconfiguration */
2351 down_read(&lgr
->llc_conf_mutex
);
2352 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
2353 struct smc_link
*lnk
= &lgr
->lnk
[i
];
2355 if (!smc_link_usable(lnk
))
2357 if (smcr_buf_map_link(buf_desc
, is_rmb
, lnk
)) {
2364 up_read(&lgr
->llc_conf_mutex
);
2370 static struct smc_buf_desc
*smcd_new_buf_create(struct smc_link_group
*lgr
,
2371 bool is_dmb
, int bufsize
)
2373 struct smc_buf_desc
*buf_desc
;
2376 /* try to alloc a new DMB */
2377 buf_desc
= kzalloc(sizeof(*buf_desc
), GFP_KERNEL
);
2379 return ERR_PTR(-ENOMEM
);
2381 rc
= smc_ism_register_dmb(lgr
, bufsize
, buf_desc
);
2385 return ERR_PTR(-EAGAIN
);
2387 return ERR_PTR(-ENOSPC
);
2388 return ERR_PTR(-EIO
);
2390 buf_desc
->pages
= virt_to_page(buf_desc
->cpu_addr
);
2391 /* CDC header stored in buf. So, pretend it was smaller */
2392 buf_desc
->len
= bufsize
- sizeof(struct smcd_cdc_msg
);
2394 buf_desc
->cpu_addr
= kzalloc(bufsize
, GFP_KERNEL
|
2395 __GFP_NOWARN
| __GFP_NORETRY
|
2397 if (!buf_desc
->cpu_addr
) {
2399 return ERR_PTR(-EAGAIN
);
2401 buf_desc
->len
= bufsize
;
2406 static int __smc_buf_create(struct smc_sock
*smc
, bool is_smcd
, bool is_rmb
)
2408 struct smc_buf_desc
*buf_desc
= ERR_PTR(-ENOMEM
);
2409 struct smc_connection
*conn
= &smc
->conn
;
2410 struct smc_link_group
*lgr
= conn
->lgr
;
2411 struct list_head
*buf_list
;
2412 int bufsize
, bufsize_comp
;
2413 struct rw_semaphore
*lock
; /* lock buffer list */
2414 bool is_dgraded
= false;
2417 /* use socket recv buffer size (w/o overhead) as start value */
2418 bufsize
= smc
->sk
.sk_rcvbuf
/ 2;
2420 /* use socket send buffer size (w/o overhead) as start value */
2421 bufsize
= smc
->sk
.sk_sndbuf
/ 2;
2423 for (bufsize_comp
= smc_compress_bufsize(bufsize
, is_smcd
, is_rmb
);
2424 bufsize_comp
>= 0; bufsize_comp
--) {
2426 lock
= &lgr
->rmbs_lock
;
2427 buf_list
= &lgr
->rmbs
[bufsize_comp
];
2429 lock
= &lgr
->sndbufs_lock
;
2430 buf_list
= &lgr
->sndbufs
[bufsize_comp
];
2432 bufsize
= smc_uncompress_bufsize(bufsize_comp
);
2434 /* check for reusable slot in the link group */
2435 buf_desc
= smc_buf_get_slot(bufsize_comp
, lock
, buf_list
);
2437 buf_desc
->is_dma_need_sync
= 0;
2438 SMC_STAT_RMB_SIZE(smc
, is_smcd
, is_rmb
, true, bufsize
);
2439 SMC_STAT_BUF_REUSE(smc
, is_smcd
, is_rmb
);
2440 break; /* found reusable slot */
2444 buf_desc
= smcd_new_buf_create(lgr
, is_rmb
, bufsize
);
2446 buf_desc
= smcr_new_buf_create(lgr
, bufsize
);
2448 if (PTR_ERR(buf_desc
) == -ENOMEM
)
2450 if (IS_ERR(buf_desc
)) {
2453 SMC_STAT_RMB_DOWNGRADED(smc
, is_smcd
, is_rmb
);
2458 SMC_STAT_RMB_ALLOC(smc
, is_smcd
, is_rmb
);
2459 SMC_STAT_RMB_SIZE(smc
, is_smcd
, is_rmb
, true, bufsize
);
2462 smc_lgr_buf_list_add(lgr
, is_rmb
, buf_list
, buf_desc
);
2467 if (IS_ERR(buf_desc
))
2468 return PTR_ERR(buf_desc
);
2471 if (smcr_buf_map_usable_links(lgr
, buf_desc
, is_rmb
)) {
2472 smcr_buf_unuse(buf_desc
, is_rmb
, lgr
);
2478 conn
->rmb_desc
= buf_desc
;
2479 conn
->rmbe_size_comp
= bufsize_comp
;
2480 smc
->sk
.sk_rcvbuf
= bufsize
* 2;
2481 atomic_set(&conn
->bytes_to_rcv
, 0);
2482 conn
->rmbe_update_limit
=
2483 smc_rmb_wnd_update_limit(buf_desc
->len
);
2485 smc_ism_set_conn(conn
); /* map RMB/smcd_dev to conn */
2487 conn
->sndbuf_desc
= buf_desc
;
2488 smc
->sk
.sk_sndbuf
= bufsize
* 2;
2489 atomic_set(&conn
->sndbuf_space
, bufsize
);
2494 void smc_sndbuf_sync_sg_for_device(struct smc_connection
*conn
)
2496 if (!conn
->sndbuf_desc
->is_dma_need_sync
)
2498 if (!smc_conn_lgr_valid(conn
) || conn
->lgr
->is_smcd
||
2499 !smc_link_active(conn
->lnk
))
2501 smc_ib_sync_sg_for_device(conn
->lnk
, conn
->sndbuf_desc
, DMA_TO_DEVICE
);
2504 void smc_rmb_sync_sg_for_cpu(struct smc_connection
*conn
)
2508 if (!conn
->rmb_desc
->is_dma_need_sync
)
2510 if (!smc_conn_lgr_valid(conn
) || conn
->lgr
->is_smcd
)
2512 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
2513 if (!smc_link_active(&conn
->lgr
->lnk
[i
]))
2515 smc_ib_sync_sg_for_cpu(&conn
->lgr
->lnk
[i
], conn
->rmb_desc
,
2520 /* create the send and receive buffer for an SMC socket;
2521 * receive buffers are called RMBs;
2522 * (even though the SMC protocol allows more than one RMB-element per RMB,
2523 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
2524 * extra RMB for every connection in a link group
2526 int smc_buf_create(struct smc_sock
*smc
, bool is_smcd
)
2530 /* create send buffer */
2532 smc_ism_support_dmb_nocopy(smc
->conn
.lgr
->smcd
))
2535 rc
= __smc_buf_create(smc
, is_smcd
, false);
2541 rc
= __smc_buf_create(smc
, is_smcd
, true);
2542 if (rc
&& smc
->conn
.sndbuf_desc
) {
2543 down_write(&smc
->conn
.lgr
->sndbufs_lock
);
2544 smc_lgr_buf_list_del(smc
->conn
.lgr
, false,
2545 smc
->conn
.sndbuf_desc
);
2546 up_write(&smc
->conn
.lgr
->sndbufs_lock
);
2547 smc_buf_free(smc
->conn
.lgr
, false, smc
->conn
.sndbuf_desc
);
2548 smc
->conn
.sndbuf_desc
= NULL
;
2553 int smcd_buf_attach(struct smc_sock
*smc
)
2555 struct smc_connection
*conn
= &smc
->conn
;
2556 struct smcd_dev
*smcd
= conn
->lgr
->smcd
;
2557 u64 peer_token
= conn
->peer_token
;
2558 struct smc_buf_desc
*buf_desc
;
2561 buf_desc
= kzalloc(sizeof(*buf_desc
), GFP_KERNEL
);
2565 /* The ghost sndbuf_desc describes the same memory region as
2566 * peer RMB. Its lifecycle is consistent with the connection's
2567 * and it will be freed with the connections instead of the
2570 rc
= smc_ism_attach_dmb(smcd
, peer_token
, buf_desc
);
2574 smc
->sk
.sk_sndbuf
= buf_desc
->len
;
2575 buf_desc
->cpu_addr
=
2576 (u8
*)buf_desc
->cpu_addr
+ sizeof(struct smcd_cdc_msg
);
2577 buf_desc
->len
-= sizeof(struct smcd_cdc_msg
);
2578 conn
->sndbuf_desc
= buf_desc
;
2579 conn
->sndbuf_desc
->used
= 1;
2580 atomic_set(&conn
->sndbuf_space
, conn
->sndbuf_desc
->len
);
2588 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group
*lgr
)
2592 for_each_clear_bit(i
, lgr
->rtokens_used_mask
, SMC_RMBS_PER_LGR_MAX
) {
2593 if (!test_and_set_bit(i
, lgr
->rtokens_used_mask
))
2599 static int smc_rtoken_find_by_link(struct smc_link_group
*lgr
, int lnk_idx
,
2604 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
2605 if (test_bit(i
, lgr
->rtokens_used_mask
) &&
2606 lgr
->rtokens
[i
][lnk_idx
].rkey
== rkey
)
2612 /* set rtoken for a new link to an existing rmb */
2613 void smc_rtoken_set(struct smc_link_group
*lgr
, int link_idx
, int link_idx_new
,
2614 __be32 nw_rkey_known
, __be64 nw_vaddr
, __be32 nw_rkey
)
2618 rtok_idx
= smc_rtoken_find_by_link(lgr
, link_idx
, ntohl(nw_rkey_known
));
2619 if (rtok_idx
== -ENOENT
)
2621 lgr
->rtokens
[rtok_idx
][link_idx_new
].rkey
= ntohl(nw_rkey
);
2622 lgr
->rtokens
[rtok_idx
][link_idx_new
].dma_addr
= be64_to_cpu(nw_vaddr
);
2625 /* set rtoken for a new link whose link_id is given */
2626 void smc_rtoken_set2(struct smc_link_group
*lgr
, int rtok_idx
, int link_id
,
2627 __be64 nw_vaddr
, __be32 nw_rkey
)
2629 u64 dma_addr
= be64_to_cpu(nw_vaddr
);
2630 u32 rkey
= ntohl(nw_rkey
);
2634 for (link_idx
= 0; link_idx
< SMC_LINKS_PER_LGR_MAX
; link_idx
++) {
2635 if (lgr
->lnk
[link_idx
].link_id
== link_id
) {
2642 lgr
->rtokens
[rtok_idx
][link_idx
].rkey
= rkey
;
2643 lgr
->rtokens
[rtok_idx
][link_idx
].dma_addr
= dma_addr
;
2646 /* add a new rtoken from peer */
2647 int smc_rtoken_add(struct smc_link
*lnk
, __be64 nw_vaddr
, __be32 nw_rkey
)
2649 struct smc_link_group
*lgr
= smc_get_lgr(lnk
);
2650 u64 dma_addr
= be64_to_cpu(nw_vaddr
);
2651 u32 rkey
= ntohl(nw_rkey
);
2654 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
2655 if (lgr
->rtokens
[i
][lnk
->link_idx
].rkey
== rkey
&&
2656 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
== dma_addr
&&
2657 test_bit(i
, lgr
->rtokens_used_mask
)) {
2658 /* already in list */
2662 i
= smc_rmb_reserve_rtoken_idx(lgr
);
2665 lgr
->rtokens
[i
][lnk
->link_idx
].rkey
= rkey
;
2666 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
= dma_addr
;
2670 /* delete an rtoken from all links */
2671 int smc_rtoken_delete(struct smc_link
*lnk
, __be32 nw_rkey
)
2673 struct smc_link_group
*lgr
= smc_get_lgr(lnk
);
2674 u32 rkey
= ntohl(nw_rkey
);
2677 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
2678 if (lgr
->rtokens
[i
][lnk
->link_idx
].rkey
== rkey
&&
2679 test_bit(i
, lgr
->rtokens_used_mask
)) {
2680 for (j
= 0; j
< SMC_LINKS_PER_LGR_MAX
; j
++) {
2681 lgr
->rtokens
[i
][j
].rkey
= 0;
2682 lgr
->rtokens
[i
][j
].dma_addr
= 0;
2684 clear_bit(i
, lgr
->rtokens_used_mask
);
2691 /* save rkey and dma_addr received from peer during clc handshake */
2692 int smc_rmb_rtoken_handling(struct smc_connection
*conn
,
2693 struct smc_link
*lnk
,
2694 struct smc_clc_msg_accept_confirm
*clc
)
2696 conn
->rtoken_idx
= smc_rtoken_add(lnk
, clc
->r0
.rmb_dma_addr
,
2698 if (conn
->rtoken_idx
< 0)
2699 return conn
->rtoken_idx
;
2703 static void smc_core_going_away(void)
2705 struct smc_ib_device
*smcibdev
;
2706 struct smcd_dev
*smcd
;
2708 mutex_lock(&smc_ib_devices
.mutex
);
2709 list_for_each_entry(smcibdev
, &smc_ib_devices
.list
, list
) {
2712 for (i
= 0; i
< SMC_MAX_PORTS
; i
++)
2713 set_bit(i
, smcibdev
->ports_going_away
);
2715 mutex_unlock(&smc_ib_devices
.mutex
);
2717 mutex_lock(&smcd_dev_list
.mutex
);
2718 list_for_each_entry(smcd
, &smcd_dev_list
.list
, list
) {
2719 smcd
->going_away
= 1;
2721 mutex_unlock(&smcd_dev_list
.mutex
);
2724 /* Clean up all SMC link groups */
2725 static void smc_lgrs_shutdown(void)
2727 struct smcd_dev
*smcd
;
2729 smc_core_going_away();
2731 smc_smcr_terminate_all(NULL
);
2733 mutex_lock(&smcd_dev_list
.mutex
);
2734 list_for_each_entry(smcd
, &smcd_dev_list
.list
, list
)
2735 smc_smcd_terminate_all(smcd
);
2736 mutex_unlock(&smcd_dev_list
.mutex
);
2739 static int smc_core_reboot_event(struct notifier_block
*this,
2740 unsigned long event
, void *ptr
)
2742 smc_lgrs_shutdown();
2743 smc_ib_unregister_client();
2748 static struct notifier_block smc_reboot_notifier
= {
2749 .notifier_call
= smc_core_reboot_event
,
2752 int __init
smc_core_init(void)
2754 return register_reboot_notifier(&smc_reboot_notifier
);
2757 /* Called (from smc_exit) when module is removed */
2758 void smc_core_exit(void)
2760 unregister_reboot_notifier(&smc_reboot_notifier
);
2761 smc_lgrs_shutdown();