1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Basic Transport Functions exploiting Infiniband API
7 * Copyright IBM Corp. 2016
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <linux/mutex.h>
19 #include <linux/list.h>
20 #include <linux/smc.h>
23 #include <rdma/ib_verbs.h>
24 #include <rdma/ib_cache.h>
33 #include "smc_close.h"
35 #include "smc_netlink.h"
37 #define SMC_LGR_NUM_INCR 256
38 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
39 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
41 struct smc_lgr_list smc_lgr_list
= { /* established link groups */
42 .lock
= __SPIN_LOCK_UNLOCKED(smc_lgr_list
.lock
),
43 .list
= LIST_HEAD_INIT(smc_lgr_list
.list
),
47 static atomic_t lgr_cnt
= ATOMIC_INIT(0); /* number of existing link groups */
48 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted
);
50 static void smc_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
51 struct smc_buf_desc
*buf_desc
);
52 static void __smc_lgr_terminate(struct smc_link_group
*lgr
, bool soft
);
54 static void smc_link_down_work(struct work_struct
*work
);
56 /* return head of link group list and its lock for a given link group */
57 static inline struct list_head
*smc_lgr_list_head(struct smc_link_group
*lgr
,
58 spinlock_t
**lgr_lock
)
61 *lgr_lock
= &lgr
->smcd
->lgr_lock
;
62 return &lgr
->smcd
->lgr_list
;
65 *lgr_lock
= &smc_lgr_list
.lock
;
66 return &smc_lgr_list
.list
;
69 static void smc_ibdev_cnt_inc(struct smc_link
*lnk
)
71 atomic_inc(&lnk
->smcibdev
->lnk_cnt_by_port
[lnk
->ibport
- 1]);
74 static void smc_ibdev_cnt_dec(struct smc_link
*lnk
)
76 atomic_dec(&lnk
->smcibdev
->lnk_cnt_by_port
[lnk
->ibport
- 1]);
79 static void smc_lgr_schedule_free_work(struct smc_link_group
*lgr
)
81 /* client link group creation always follows the server link group
82 * creation. For client use a somewhat higher removal delay time,
83 * otherwise there is a risk of out-of-sync link groups.
86 mod_delayed_work(system_wq
, &lgr
->free_work
,
87 (!lgr
->is_smcd
&& lgr
->role
== SMC_CLNT
) ?
88 SMC_LGR_FREE_DELAY_CLNT
:
89 SMC_LGR_FREE_DELAY_SERV
);
93 /* Register connection's alert token in our lookup structure.
94 * To use rbtrees we have to implement our own insert core.
95 * Requires @conns_lock
96 * @smc connection to register
97 * Returns 0 on success, != otherwise.
99 static void smc_lgr_add_alert_token(struct smc_connection
*conn
)
101 struct rb_node
**link
, *parent
= NULL
;
102 u32 token
= conn
->alert_token_local
;
104 link
= &conn
->lgr
->conns_all
.rb_node
;
106 struct smc_connection
*cur
= rb_entry(*link
,
107 struct smc_connection
, alert_node
);
110 if (cur
->alert_token_local
> token
)
111 link
= &parent
->rb_left
;
113 link
= &parent
->rb_right
;
115 /* Put the new node there */
116 rb_link_node(&conn
->alert_node
, parent
, link
);
117 rb_insert_color(&conn
->alert_node
, &conn
->lgr
->conns_all
);
120 /* assign an SMC-R link to the connection */
121 static int smcr_lgr_conn_assign_link(struct smc_connection
*conn
, bool first
)
123 enum smc_link_state expected
= first
? SMC_LNK_ACTIVATING
:
127 /* do link balancing */
128 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
129 struct smc_link
*lnk
= &conn
->lgr
->lnk
[i
];
131 if (lnk
->state
!= expected
|| lnk
->link_is_asym
)
133 if (conn
->lgr
->role
== SMC_CLNT
) {
134 conn
->lnk
= lnk
; /* temporary, SMC server assigns link*/
137 if (conn
->lgr
->conns_num
% 2) {
138 for (j
= i
+ 1; j
< SMC_LINKS_PER_LGR_MAX
; j
++) {
139 struct smc_link
*lnk2
;
141 lnk2
= &conn
->lgr
->lnk
[j
];
142 if (lnk2
->state
== expected
&&
143 !lnk2
->link_is_asym
) {
154 return SMC_CLC_DECL_NOACTLINK
;
155 atomic_inc(&conn
->lnk
->conn_cnt
);
159 /* Register connection in link group by assigning an alert token
160 * registered in a search tree.
161 * Requires @conns_lock
162 * Note that '0' is a reserved value and not assigned.
164 static int smc_lgr_register_conn(struct smc_connection
*conn
, bool first
)
166 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
167 static atomic_t nexttoken
= ATOMIC_INIT(0);
170 if (!conn
->lgr
->is_smcd
) {
171 rc
= smcr_lgr_conn_assign_link(conn
, first
);
175 /* find a new alert_token_local value not yet used by some connection
178 sock_hold(&smc
->sk
); /* sock_put in smc_lgr_unregister_conn() */
179 while (!conn
->alert_token_local
) {
180 conn
->alert_token_local
= atomic_inc_return(&nexttoken
);
181 if (smc_lgr_find_conn(conn
->alert_token_local
, conn
->lgr
))
182 conn
->alert_token_local
= 0;
184 smc_lgr_add_alert_token(conn
);
185 conn
->lgr
->conns_num
++;
189 /* Unregister connection and reset the alert token of the given connection<
191 static void __smc_lgr_unregister_conn(struct smc_connection
*conn
)
193 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
194 struct smc_link_group
*lgr
= conn
->lgr
;
196 rb_erase(&conn
->alert_node
, &lgr
->conns_all
);
198 atomic_dec(&conn
->lnk
->conn_cnt
);
200 conn
->alert_token_local
= 0;
201 sock_put(&smc
->sk
); /* sock_hold in smc_lgr_register_conn() */
204 /* Unregister connection from lgr
206 static void smc_lgr_unregister_conn(struct smc_connection
*conn
)
208 struct smc_link_group
*lgr
= conn
->lgr
;
212 write_lock_bh(&lgr
->conns_lock
);
213 if (conn
->alert_token_local
) {
214 __smc_lgr_unregister_conn(conn
);
216 write_unlock_bh(&lgr
->conns_lock
);
220 int smc_nl_get_sys_info(struct sk_buff
*skb
, struct netlink_callback
*cb
)
222 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
223 char hostname
[SMC_MAX_HOSTNAME_LEN
+ 1];
224 char smc_seid
[SMC_MAX_EID_LEN
+ 1];
225 struct smcd_dev
*smcd_dev
;
226 struct nlattr
*attrs
;
231 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
232 &smc_gen_nl_family
, NLM_F_MULTI
,
233 SMC_NETLINK_GET_SYS_INFO
);
238 attrs
= nla_nest_start(skb
, SMC_GEN_SYS_INFO
);
241 if (nla_put_u8(skb
, SMC_NLA_SYS_VER
, SMC_V2
))
243 if (nla_put_u8(skb
, SMC_NLA_SYS_REL
, SMC_RELEASE
))
245 if (nla_put_u8(skb
, SMC_NLA_SYS_IS_ISM_V2
, smc_ism_is_v2_capable()))
247 smc_clc_get_hostname(&host
);
249 snprintf(hostname
, sizeof(hostname
), "%s", host
);
250 if (nla_put_string(skb
, SMC_NLA_SYS_LOCAL_HOST
, hostname
))
253 mutex_lock(&smcd_dev_list
.mutex
);
254 smcd_dev
= list_first_entry_or_null(&smcd_dev_list
.list
,
255 struct smcd_dev
, list
);
257 smc_ism_get_system_eid(smcd_dev
, &seid
);
258 mutex_unlock(&smcd_dev_list
.mutex
);
259 if (seid
&& smc_ism_is_v2_capable()) {
260 snprintf(smc_seid
, sizeof(smc_seid
), "%s", seid
);
261 if (nla_put_string(skb
, SMC_NLA_SYS_SEID
, smc_seid
))
264 nla_nest_end(skb
, attrs
);
265 genlmsg_end(skb
, nlh
);
270 nla_nest_cancel(skb
, attrs
);
272 genlmsg_cancel(skb
, nlh
);
277 static int smc_nl_fill_lgr(struct smc_link_group
*lgr
,
279 struct netlink_callback
*cb
)
281 char smc_target
[SMC_MAX_PNETID_LEN
+ 1];
282 struct nlattr
*attrs
;
284 attrs
= nla_nest_start(skb
, SMC_GEN_LGR_SMCR
);
288 if (nla_put_u32(skb
, SMC_NLA_LGR_R_ID
, *((u32
*)&lgr
->id
)))
290 if (nla_put_u32(skb
, SMC_NLA_LGR_R_CONNS_NUM
, lgr
->conns_num
))
292 if (nla_put_u8(skb
, SMC_NLA_LGR_R_ROLE
, lgr
->role
))
294 if (nla_put_u8(skb
, SMC_NLA_LGR_R_TYPE
, lgr
->type
))
296 if (nla_put_u8(skb
, SMC_NLA_LGR_R_VLAN_ID
, lgr
->vlan_id
))
298 snprintf(smc_target
, sizeof(smc_target
), "%s", lgr
->pnet_id
);
299 if (nla_put_string(skb
, SMC_NLA_LGR_R_PNETID
, smc_target
))
302 nla_nest_end(skb
, attrs
);
305 nla_nest_cancel(skb
, attrs
);
310 static int smc_nl_fill_lgr_link(struct smc_link_group
*lgr
,
311 struct smc_link
*link
,
313 struct netlink_callback
*cb
)
315 char smc_ibname
[IB_DEVICE_NAME_MAX
+ 1];
316 u8 smc_gid_target
[41];
317 struct nlattr
*attrs
;
321 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
322 &smc_gen_nl_family
, NLM_F_MULTI
,
323 SMC_NETLINK_GET_LINK_SMCR
);
327 attrs
= nla_nest_start(skb
, SMC_GEN_LINK_SMCR
);
331 if (nla_put_u8(skb
, SMC_NLA_LINK_ID
, link
->link_id
))
333 if (nla_put_u32(skb
, SMC_NLA_LINK_STATE
, link
->state
))
335 if (nla_put_u32(skb
, SMC_NLA_LINK_CONN_CNT
,
336 atomic_read(&link
->conn_cnt
)))
338 if (nla_put_u8(skb
, SMC_NLA_LINK_IB_PORT
, link
->ibport
))
340 if (nla_put_u32(skb
, SMC_NLA_LINK_NET_DEV
, link
->ndev_ifidx
))
342 snprintf(smc_ibname
, sizeof(smc_ibname
), "%s", link
->ibname
);
343 if (nla_put_string(skb
, SMC_NLA_LINK_IB_DEV
, smc_ibname
))
345 memcpy(&link_uid
, link
->link_uid
, sizeof(link_uid
));
346 if (nla_put_u32(skb
, SMC_NLA_LINK_UID
, link_uid
))
348 memcpy(&link_uid
, link
->peer_link_uid
, sizeof(link_uid
));
349 if (nla_put_u32(skb
, SMC_NLA_LINK_PEER_UID
, link_uid
))
351 memset(smc_gid_target
, 0, sizeof(smc_gid_target
));
352 smc_gid_be16_convert(smc_gid_target
, link
->gid
);
353 if (nla_put_string(skb
, SMC_NLA_LINK_GID
, smc_gid_target
))
355 memset(smc_gid_target
, 0, sizeof(smc_gid_target
));
356 smc_gid_be16_convert(smc_gid_target
, link
->peer_gid
);
357 if (nla_put_string(skb
, SMC_NLA_LINK_PEER_GID
, smc_gid_target
))
360 nla_nest_end(skb
, attrs
);
361 genlmsg_end(skb
, nlh
);
364 nla_nest_cancel(skb
, attrs
);
366 genlmsg_cancel(skb
, nlh
);
371 static int smc_nl_handle_lgr(struct smc_link_group
*lgr
,
373 struct netlink_callback
*cb
,
379 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
380 &smc_gen_nl_family
, NLM_F_MULTI
,
381 SMC_NETLINK_GET_LGR_SMCR
);
384 if (smc_nl_fill_lgr(lgr
, skb
, cb
))
387 genlmsg_end(skb
, nlh
);
390 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
391 if (!smc_link_usable(&lgr
->lnk
[i
]))
393 if (smc_nl_fill_lgr_link(lgr
, &lgr
->lnk
[i
], skb
, cb
))
400 genlmsg_cancel(skb
, nlh
);
405 static void smc_nl_fill_lgr_list(struct smc_lgr_list
*smc_lgr
,
407 struct netlink_callback
*cb
,
410 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
411 struct smc_link_group
*lgr
;
412 int snum
= cb_ctx
->pos
[0];
415 spin_lock_bh(&smc_lgr
->lock
);
416 list_for_each_entry(lgr
, &smc_lgr
->list
, list
) {
419 if (smc_nl_handle_lgr(lgr
, skb
, cb
, list_links
))
425 spin_unlock_bh(&smc_lgr
->lock
);
426 cb_ctx
->pos
[0] = num
;
429 static int smc_nl_fill_smcd_lgr(struct smc_link_group
*lgr
,
431 struct netlink_callback
*cb
)
433 char smc_host
[SMC_MAX_HOSTNAME_LEN
+ 1];
434 char smc_pnet
[SMC_MAX_PNETID_LEN
+ 1];
435 char smc_eid
[SMC_MAX_EID_LEN
+ 1];
436 struct nlattr
*v2_attrs
;
437 struct nlattr
*attrs
;
440 nlh
= genlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
441 &smc_gen_nl_family
, NLM_F_MULTI
,
442 SMC_NETLINK_GET_LGR_SMCD
);
446 attrs
= nla_nest_start(skb
, SMC_GEN_LGR_SMCD
);
450 if (nla_put_u32(skb
, SMC_NLA_LGR_D_ID
, *((u32
*)&lgr
->id
)))
452 if (nla_put_u64_64bit(skb
, SMC_NLA_LGR_D_GID
, lgr
->smcd
->local_gid
,
455 if (nla_put_u64_64bit(skb
, SMC_NLA_LGR_D_PEER_GID
, lgr
->peer_gid
,
458 if (nla_put_u8(skb
, SMC_NLA_LGR_D_VLAN_ID
, lgr
->vlan_id
))
460 if (nla_put_u32(skb
, SMC_NLA_LGR_D_CONNS_NUM
, lgr
->conns_num
))
462 if (nla_put_u32(skb
, SMC_NLA_LGR_D_CHID
, smc_ism_get_chid(lgr
->smcd
)))
464 snprintf(smc_pnet
, sizeof(smc_pnet
), "%s", lgr
->smcd
->pnetid
);
465 if (nla_put_string(skb
, SMC_NLA_LGR_D_PNETID
, smc_pnet
))
468 v2_attrs
= nla_nest_start(skb
, SMC_NLA_LGR_V2
);
471 if (nla_put_u8(skb
, SMC_NLA_LGR_V2_VER
, lgr
->smc_version
))
473 if (nla_put_u8(skb
, SMC_NLA_LGR_V2_REL
, lgr
->peer_smc_release
))
475 if (nla_put_u8(skb
, SMC_NLA_LGR_V2_OS
, lgr
->peer_os
))
477 snprintf(smc_host
, sizeof(smc_host
), "%s", lgr
->peer_hostname
);
478 if (nla_put_string(skb
, SMC_NLA_LGR_V2_PEER_HOST
, smc_host
))
480 snprintf(smc_eid
, sizeof(smc_eid
), "%s", lgr
->negotiated_eid
);
481 if (nla_put_string(skb
, SMC_NLA_LGR_V2_NEG_EID
, smc_eid
))
484 nla_nest_end(skb
, v2_attrs
);
485 nla_nest_end(skb
, attrs
);
486 genlmsg_end(skb
, nlh
);
490 nla_nest_cancel(skb
, v2_attrs
);
492 nla_nest_cancel(skb
, attrs
);
494 genlmsg_cancel(skb
, nlh
);
499 static int smc_nl_handle_smcd_lgr(struct smcd_dev
*dev
,
501 struct netlink_callback
*cb
)
503 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
504 struct smc_link_group
*lgr
;
505 int snum
= cb_ctx
->pos
[1];
508 spin_lock_bh(&dev
->lgr_lock
);
509 list_for_each_entry(lgr
, &dev
->lgr_list
, list
) {
514 rc
= smc_nl_fill_smcd_lgr(lgr
, skb
, cb
);
521 spin_unlock_bh(&dev
->lgr_lock
);
522 cb_ctx
->pos
[1] = num
;
526 static int smc_nl_fill_smcd_dev(struct smcd_dev_list
*dev_list
,
528 struct netlink_callback
*cb
)
530 struct smc_nl_dmp_ctx
*cb_ctx
= smc_nl_dmp_ctx(cb
);
531 struct smcd_dev
*smcd_dev
;
532 int snum
= cb_ctx
->pos
[0];
535 mutex_lock(&dev_list
->mutex
);
536 list_for_each_entry(smcd_dev
, &dev_list
->list
, list
) {
537 if (list_empty(&smcd_dev
->lgr_list
))
541 rc
= smc_nl_handle_smcd_lgr(smcd_dev
, skb
, cb
);
548 mutex_unlock(&dev_list
->mutex
);
549 cb_ctx
->pos
[0] = num
;
553 int smcr_nl_get_lgr(struct sk_buff
*skb
, struct netlink_callback
*cb
)
555 bool list_links
= false;
557 smc_nl_fill_lgr_list(&smc_lgr_list
, skb
, cb
, list_links
);
561 int smcr_nl_get_link(struct sk_buff
*skb
, struct netlink_callback
*cb
)
563 bool list_links
= true;
565 smc_nl_fill_lgr_list(&smc_lgr_list
, skb
, cb
, list_links
);
569 int smcd_nl_get_lgr(struct sk_buff
*skb
, struct netlink_callback
*cb
)
571 smc_nl_fill_smcd_dev(&smcd_dev_list
, skb
, cb
);
575 void smc_lgr_cleanup_early(struct smc_connection
*conn
)
577 struct smc_link_group
*lgr
= conn
->lgr
;
578 struct list_head
*lgr_list
;
579 spinlock_t
*lgr_lock
;
585 lgr_list
= smc_lgr_list_head(lgr
, &lgr_lock
);
586 spin_lock_bh(lgr_lock
);
587 /* do not use this link group for new connections */
588 if (!list_empty(lgr_list
))
589 list_del_init(lgr_list
);
590 spin_unlock_bh(lgr_lock
);
591 __smc_lgr_terminate(lgr
, true);
594 static void smcr_lgr_link_deactivate_all(struct smc_link_group
*lgr
)
598 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
599 struct smc_link
*lnk
= &lgr
->lnk
[i
];
601 if (smc_link_usable(lnk
))
602 lnk
->state
= SMC_LNK_INACTIVE
;
604 wake_up_all(&lgr
->llc_msg_waiter
);
605 wake_up_all(&lgr
->llc_flow_waiter
);
608 static void smc_lgr_free(struct smc_link_group
*lgr
);
610 static void smc_lgr_free_work(struct work_struct
*work
)
612 struct smc_link_group
*lgr
= container_of(to_delayed_work(work
),
613 struct smc_link_group
,
615 spinlock_t
*lgr_lock
;
618 smc_lgr_list_head(lgr
, &lgr_lock
);
619 spin_lock_bh(lgr_lock
);
621 spin_unlock_bh(lgr_lock
);
624 read_lock_bh(&lgr
->conns_lock
);
625 conns
= RB_EMPTY_ROOT(&lgr
->conns_all
);
626 read_unlock_bh(&lgr
->conns_lock
);
627 if (!conns
) { /* number of lgr connections is no longer zero */
628 spin_unlock_bh(lgr_lock
);
631 list_del_init(&lgr
->list
); /* remove from smc_lgr_list */
632 lgr
->freeing
= 1; /* this instance does the freeing, no new schedule */
633 spin_unlock_bh(lgr_lock
);
634 cancel_delayed_work(&lgr
->free_work
);
636 if (!lgr
->is_smcd
&& !lgr
->terminating
)
637 smc_llc_send_link_delete_all(lgr
, true,
638 SMC_LLC_DEL_PROG_INIT_TERM
);
639 if (lgr
->is_smcd
&& !lgr
->terminating
)
640 smc_ism_signal_shutdown(lgr
);
642 smcr_lgr_link_deactivate_all(lgr
);
646 static void smc_lgr_terminate_work(struct work_struct
*work
)
648 struct smc_link_group
*lgr
= container_of(work
, struct smc_link_group
,
651 __smc_lgr_terminate(lgr
, true);
654 /* return next unique link id for the lgr */
655 static u8
smcr_next_link_id(struct smc_link_group
*lgr
)
661 link_id
= ++lgr
->next_link_id
;
662 if (!link_id
) /* skip zero as link_id */
663 link_id
= ++lgr
->next_link_id
;
664 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
665 if (smc_link_usable(&lgr
->lnk
[i
]) &&
666 lgr
->lnk
[i
].link_id
== link_id
)
674 static void smcr_copy_dev_info_to_link(struct smc_link
*link
)
676 struct smc_ib_device
*smcibdev
= link
->smcibdev
;
678 snprintf(link
->ibname
, sizeof(link
->ibname
), "%s",
679 smcibdev
->ibdev
->name
);
680 link
->ndev_ifidx
= smcibdev
->ndev_ifidx
[link
->ibport
- 1];
683 int smcr_link_init(struct smc_link_group
*lgr
, struct smc_link
*lnk
,
684 u8 link_idx
, struct smc_init_info
*ini
)
689 get_device(&ini
->ib_dev
->ibdev
->dev
);
690 atomic_inc(&ini
->ib_dev
->lnk_cnt
);
691 lnk
->link_id
= smcr_next_link_id(lgr
);
693 lnk
->link_idx
= link_idx
;
694 lnk
->smcibdev
= ini
->ib_dev
;
695 lnk
->ibport
= ini
->ib_port
;
696 smc_ibdev_cnt_inc(lnk
);
697 smcr_copy_dev_info_to_link(lnk
);
698 lnk
->path_mtu
= ini
->ib_dev
->pattr
[ini
->ib_port
- 1].active_mtu
;
699 atomic_set(&lnk
->conn_cnt
, 0);
700 smc_llc_link_set_uid(lnk
);
701 INIT_WORK(&lnk
->link_down_wrk
, smc_link_down_work
);
702 if (!ini
->ib_dev
->initialized
) {
703 rc
= (int)smc_ib_setup_per_ibdev(ini
->ib_dev
);
707 get_random_bytes(rndvec
, sizeof(rndvec
));
708 lnk
->psn_initial
= rndvec
[0] + (rndvec
[1] << 8) +
710 rc
= smc_ib_determine_gid(lnk
->smcibdev
, lnk
->ibport
,
711 ini
->vlan_id
, lnk
->gid
, &lnk
->sgid_index
);
714 rc
= smc_llc_link_init(lnk
);
717 rc
= smc_wr_alloc_link_mem(lnk
);
720 rc
= smc_ib_create_protection_domain(lnk
);
723 rc
= smc_ib_create_queue_pair(lnk
);
726 rc
= smc_wr_create_link(lnk
);
729 lnk
->state
= SMC_LNK_ACTIVATING
;
733 smc_ib_destroy_queue_pair(lnk
);
735 smc_ib_dealloc_protection_domain(lnk
);
737 smc_wr_free_link_mem(lnk
);
739 smc_llc_link_clear(lnk
, false);
741 smc_ibdev_cnt_dec(lnk
);
742 put_device(&ini
->ib_dev
->ibdev
->dev
);
743 memset(lnk
, 0, sizeof(struct smc_link
));
744 lnk
->state
= SMC_LNK_UNUSED
;
745 if (!atomic_dec_return(&ini
->ib_dev
->lnk_cnt
))
746 wake_up(&ini
->ib_dev
->lnks_deleted
);
750 /* create a new SMC link group */
751 static int smc_lgr_create(struct smc_sock
*smc
, struct smc_init_info
*ini
)
753 struct smc_link_group
*lgr
;
754 struct list_head
*lgr_list
;
755 struct smc_link
*lnk
;
756 spinlock_t
*lgr_lock
;
761 if (ini
->is_smcd
&& ini
->vlan_id
) {
762 if (smc_ism_get_vlan(ini
->ism_dev
[ini
->ism_selected
],
764 rc
= SMC_CLC_DECL_ISMVLANERR
;
769 lgr
= kzalloc(sizeof(*lgr
), GFP_KERNEL
);
771 rc
= SMC_CLC_DECL_MEM
;
774 lgr
->tx_wq
= alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
775 SMC_LGR_ID_SIZE
, &lgr
->id
);
780 lgr
->is_smcd
= ini
->is_smcd
;
782 lgr
->terminating
= 0;
784 lgr
->vlan_id
= ini
->vlan_id
;
785 mutex_init(&lgr
->sndbufs_lock
);
786 mutex_init(&lgr
->rmbs_lock
);
787 rwlock_init(&lgr
->conns_lock
);
788 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
789 INIT_LIST_HEAD(&lgr
->sndbufs
[i
]);
790 INIT_LIST_HEAD(&lgr
->rmbs
[i
]);
792 lgr
->next_link_id
= 0;
793 smc_lgr_list
.num
+= SMC_LGR_NUM_INCR
;
794 memcpy(&lgr
->id
, (u8
*)&smc_lgr_list
.num
, SMC_LGR_ID_SIZE
);
795 INIT_DELAYED_WORK(&lgr
->free_work
, smc_lgr_free_work
);
796 INIT_WORK(&lgr
->terminate_work
, smc_lgr_terminate_work
);
797 lgr
->conns_all
= RB_ROOT
;
799 /* SMC-D specific settings */
800 get_device(&ini
->ism_dev
[ini
->ism_selected
]->dev
);
801 lgr
->peer_gid
= ini
->ism_peer_gid
[ini
->ism_selected
];
802 lgr
->smcd
= ini
->ism_dev
[ini
->ism_selected
];
803 lgr_list
= &ini
->ism_dev
[ini
->ism_selected
]->lgr_list
;
804 lgr_lock
= &lgr
->smcd
->lgr_lock
;
805 lgr
->smc_version
= ini
->smcd_version
;
806 lgr
->peer_shutdown
= 0;
807 atomic_inc(&ini
->ism_dev
[ini
->ism_selected
]->lgr_cnt
);
809 /* SMC-R specific settings */
810 lgr
->role
= smc
->listen_smc
? SMC_SERV
: SMC_CLNT
;
811 memcpy(lgr
->peer_systemid
, ini
->ib_lcl
->id_for_peer
,
813 memcpy(lgr
->pnet_id
, ini
->ib_dev
->pnetid
[ini
->ib_port
- 1],
815 smc_llc_lgr_init(lgr
, smc
);
817 link_idx
= SMC_SINGLE_LINK
;
818 lnk
= &lgr
->lnk
[link_idx
];
819 rc
= smcr_link_init(lgr
, lnk
, link_idx
, ini
);
822 lgr_list
= &smc_lgr_list
.list
;
823 lgr_lock
= &smc_lgr_list
.lock
;
824 atomic_inc(&lgr_cnt
);
827 spin_lock_bh(lgr_lock
);
828 list_add_tail(&lgr
->list
, lgr_list
);
829 spin_unlock_bh(lgr_lock
);
833 destroy_workqueue(lgr
->tx_wq
);
837 if (ini
->is_smcd
&& ini
->vlan_id
)
838 smc_ism_put_vlan(ini
->ism_dev
[ini
->ism_selected
], ini
->vlan_id
);
842 rc
= SMC_CLC_DECL_MEM
;
844 rc
= SMC_CLC_DECL_INTERR
;
849 static int smc_write_space(struct smc_connection
*conn
)
851 int buffer_len
= conn
->peer_rmbe_size
;
852 union smc_host_cursor prod
;
853 union smc_host_cursor cons
;
856 smc_curs_copy(&prod
, &conn
->local_tx_ctrl
.prod
, conn
);
857 smc_curs_copy(&cons
, &conn
->local_rx_ctrl
.cons
, conn
);
858 /* determine rx_buf space */
859 space
= buffer_len
- smc_curs_diff(buffer_len
, &cons
, &prod
);
863 static int smc_switch_cursor(struct smc_sock
*smc
, struct smc_cdc_tx_pend
*pend
,
864 struct smc_wr_buf
*wr_buf
)
866 struct smc_connection
*conn
= &smc
->conn
;
867 union smc_host_cursor cons
, fin
;
871 smc_curs_copy(&conn
->tx_curs_sent
, &conn
->tx_curs_fin
, conn
);
872 smc_curs_copy(&fin
, &conn
->local_tx_ctrl_fin
, conn
);
873 /* set prod cursor to old state, enforce tx_rdma_writes() */
874 smc_curs_copy(&conn
->local_tx_ctrl
.prod
, &fin
, conn
);
875 smc_curs_copy(&cons
, &conn
->local_rx_ctrl
.cons
, conn
);
877 if (smc_curs_comp(conn
->peer_rmbe_size
, &cons
, &fin
) < 0) {
878 /* cons cursor advanced more than fin, and prod was set
879 * fin above, so now prod is smaller than cons. Fix that.
881 diff
= smc_curs_diff(conn
->peer_rmbe_size
, &fin
, &cons
);
882 smc_curs_add(conn
->sndbuf_desc
->len
,
883 &conn
->tx_curs_sent
, diff
);
884 smc_curs_add(conn
->sndbuf_desc
->len
,
885 &conn
->tx_curs_fin
, diff
);
887 smp_mb__before_atomic();
888 atomic_add(diff
, &conn
->sndbuf_space
);
889 smp_mb__after_atomic();
891 smc_curs_add(conn
->peer_rmbe_size
,
892 &conn
->local_tx_ctrl
.prod
, diff
);
893 smc_curs_add(conn
->peer_rmbe_size
,
894 &conn
->local_tx_ctrl_fin
, diff
);
896 /* recalculate, value is used by tx_rdma_writes() */
897 atomic_set(&smc
->conn
.peer_rmbe_space
, smc_write_space(conn
));
899 if (smc
->sk
.sk_state
!= SMC_INIT
&&
900 smc
->sk
.sk_state
!= SMC_CLOSED
) {
901 rc
= smcr_cdc_msg_send_validation(conn
, pend
, wr_buf
);
903 queue_delayed_work(conn
->lgr
->tx_wq
, &conn
->tx_work
, 0);
904 smc
->sk
.sk_data_ready(&smc
->sk
);
907 smc_wr_tx_put_slot(conn
->lnk
,
908 (struct smc_wr_tx_pend_priv
*)pend
);
913 static void smc_switch_link_and_count(struct smc_connection
*conn
,
914 struct smc_link
*to_lnk
)
916 atomic_dec(&conn
->lnk
->conn_cnt
);
918 atomic_inc(&conn
->lnk
->conn_cnt
);
921 struct smc_link
*smc_switch_conns(struct smc_link_group
*lgr
,
922 struct smc_link
*from_lnk
, bool is_dev_err
)
924 struct smc_link
*to_lnk
= NULL
;
925 struct smc_cdc_tx_pend
*pend
;
926 struct smc_connection
*conn
;
927 struct smc_wr_buf
*wr_buf
;
928 struct smc_sock
*smc
;
929 struct rb_node
*node
;
932 /* link is inactive, wake up tx waiters */
933 smc_wr_wakeup_tx_wait(from_lnk
);
935 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
936 if (!smc_link_active(&lgr
->lnk
[i
]) || i
== from_lnk
->link_idx
)
938 if (is_dev_err
&& from_lnk
->smcibdev
== lgr
->lnk
[i
].smcibdev
&&
939 from_lnk
->ibport
== lgr
->lnk
[i
].ibport
) {
942 to_lnk
= &lgr
->lnk
[i
];
946 smc_lgr_terminate_sched(lgr
);
950 read_lock_bh(&lgr
->conns_lock
);
951 for (node
= rb_first(&lgr
->conns_all
); node
; node
= rb_next(node
)) {
952 conn
= rb_entry(node
, struct smc_connection
, alert_node
);
953 if (conn
->lnk
!= from_lnk
)
955 smc
= container_of(conn
, struct smc_sock
, conn
);
956 /* conn->lnk not yet set in SMC_INIT state */
957 if (smc
->sk
.sk_state
== SMC_INIT
)
959 if (smc
->sk
.sk_state
== SMC_CLOSED
||
960 smc
->sk
.sk_state
== SMC_PEERCLOSEWAIT1
||
961 smc
->sk
.sk_state
== SMC_PEERCLOSEWAIT2
||
962 smc
->sk
.sk_state
== SMC_APPFINCLOSEWAIT
||
963 smc
->sk
.sk_state
== SMC_APPCLOSEWAIT1
||
964 smc
->sk
.sk_state
== SMC_APPCLOSEWAIT2
||
965 smc
->sk
.sk_state
== SMC_PEERFINCLOSEWAIT
||
966 smc
->sk
.sk_state
== SMC_PEERABORTWAIT
||
967 smc
->sk
.sk_state
== SMC_PROCESSABORT
) {
968 spin_lock_bh(&conn
->send_lock
);
969 smc_switch_link_and_count(conn
, to_lnk
);
970 spin_unlock_bh(&conn
->send_lock
);
974 read_unlock_bh(&lgr
->conns_lock
);
975 /* pre-fetch buffer outside of send_lock, might sleep */
976 rc
= smc_cdc_get_free_slot(conn
, to_lnk
, &wr_buf
, NULL
, &pend
);
978 smcr_link_down_cond_sched(to_lnk
);
981 /* avoid race with smcr_tx_sndbuf_nonempty() */
982 spin_lock_bh(&conn
->send_lock
);
983 smc_switch_link_and_count(conn
, to_lnk
);
984 rc
= smc_switch_cursor(smc
, pend
, wr_buf
);
985 spin_unlock_bh(&conn
->send_lock
);
988 smcr_link_down_cond_sched(to_lnk
);
993 read_unlock_bh(&lgr
->conns_lock
);
997 static void smcr_buf_unuse(struct smc_buf_desc
*rmb_desc
,
998 struct smc_link_group
*lgr
)
1002 if (rmb_desc
->is_conf_rkey
&& !list_empty(&lgr
->list
)) {
1003 /* unregister rmb with peer */
1004 rc
= smc_llc_flow_initiate(lgr
, SMC_LLC_FLOW_RKEY
);
1006 /* protect against smc_llc_cli_rkey_exchange() */
1007 mutex_lock(&lgr
->llc_conf_mutex
);
1008 smc_llc_do_delete_rkey(lgr
, rmb_desc
);
1009 rmb_desc
->is_conf_rkey
= false;
1010 mutex_unlock(&lgr
->llc_conf_mutex
);
1011 smc_llc_flow_stop(lgr
, &lgr
->llc_flow_lcl
);
1015 if (rmb_desc
->is_reg_err
) {
1016 /* buf registration failed, reuse not possible */
1017 mutex_lock(&lgr
->rmbs_lock
);
1018 list_del(&rmb_desc
->list
);
1019 mutex_unlock(&lgr
->rmbs_lock
);
1021 smc_buf_free(lgr
, true, rmb_desc
);
1027 static void smc_buf_unuse(struct smc_connection
*conn
,
1028 struct smc_link_group
*lgr
)
1030 if (conn
->sndbuf_desc
)
1031 conn
->sndbuf_desc
->used
= 0;
1032 if (conn
->rmb_desc
&& lgr
->is_smcd
)
1033 conn
->rmb_desc
->used
= 0;
1034 else if (conn
->rmb_desc
)
1035 smcr_buf_unuse(conn
->rmb_desc
, lgr
);
1038 /* remove a finished connection from its link group */
1039 void smc_conn_free(struct smc_connection
*conn
)
1041 struct smc_link_group
*lgr
= conn
->lgr
;
1046 if (!list_empty(&lgr
->list
))
1047 smc_ism_unset_conn(conn
);
1048 tasklet_kill(&conn
->rx_tsklet
);
1050 smc_cdc_tx_dismiss_slots(conn
);
1051 if (current_work() != &conn
->abort_work
)
1052 cancel_work_sync(&conn
->abort_work
);
1054 if (!list_empty(&lgr
->list
)) {
1055 smc_lgr_unregister_conn(conn
);
1056 smc_buf_unuse(conn
, lgr
); /* allow buffer reuse */
1059 if (!lgr
->conns_num
)
1060 smc_lgr_schedule_free_work(lgr
);
1063 /* unregister a link from a buf_desc */
1064 static void smcr_buf_unmap_link(struct smc_buf_desc
*buf_desc
, bool is_rmb
,
1065 struct smc_link
*lnk
)
1068 buf_desc
->is_reg_mr
[lnk
->link_idx
] = false;
1069 if (!buf_desc
->is_map_ib
[lnk
->link_idx
])
1072 if (buf_desc
->mr_rx
[lnk
->link_idx
]) {
1073 smc_ib_put_memory_region(
1074 buf_desc
->mr_rx
[lnk
->link_idx
]);
1075 buf_desc
->mr_rx
[lnk
->link_idx
] = NULL
;
1077 smc_ib_buf_unmap_sg(lnk
, buf_desc
, DMA_FROM_DEVICE
);
1079 smc_ib_buf_unmap_sg(lnk
, buf_desc
, DMA_TO_DEVICE
);
1081 sg_free_table(&buf_desc
->sgt
[lnk
->link_idx
]);
1082 buf_desc
->is_map_ib
[lnk
->link_idx
] = false;
1085 /* unmap all buffers of lgr for a deleted link */
1086 static void smcr_buf_unmap_lgr(struct smc_link
*lnk
)
1088 struct smc_link_group
*lgr
= lnk
->lgr
;
1089 struct smc_buf_desc
*buf_desc
, *bf
;
1092 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1093 mutex_lock(&lgr
->rmbs_lock
);
1094 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->rmbs
[i
], list
)
1095 smcr_buf_unmap_link(buf_desc
, true, lnk
);
1096 mutex_unlock(&lgr
->rmbs_lock
);
1097 mutex_lock(&lgr
->sndbufs_lock
);
1098 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->sndbufs
[i
],
1100 smcr_buf_unmap_link(buf_desc
, false, lnk
);
1101 mutex_unlock(&lgr
->sndbufs_lock
);
1105 static void smcr_rtoken_clear_link(struct smc_link
*lnk
)
1107 struct smc_link_group
*lgr
= lnk
->lgr
;
1110 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
1111 lgr
->rtokens
[i
][lnk
->link_idx
].rkey
= 0;
1112 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
= 0;
1116 /* must be called under lgr->llc_conf_mutex lock */
1117 void smcr_link_clear(struct smc_link
*lnk
, bool log
)
1119 struct smc_ib_device
*smcibdev
;
1121 if (!lnk
->lgr
|| lnk
->state
== SMC_LNK_UNUSED
)
1124 smc_llc_link_clear(lnk
, log
);
1125 smcr_buf_unmap_lgr(lnk
);
1126 smcr_rtoken_clear_link(lnk
);
1127 smc_ib_modify_qp_reset(lnk
);
1128 smc_wr_free_link(lnk
);
1129 smc_ib_destroy_queue_pair(lnk
);
1130 smc_ib_dealloc_protection_domain(lnk
);
1131 smc_wr_free_link_mem(lnk
);
1132 smc_ibdev_cnt_dec(lnk
);
1133 put_device(&lnk
->smcibdev
->ibdev
->dev
);
1134 smcibdev
= lnk
->smcibdev
;
1135 memset(lnk
, 0, sizeof(struct smc_link
));
1136 lnk
->state
= SMC_LNK_UNUSED
;
1137 if (!atomic_dec_return(&smcibdev
->lnk_cnt
))
1138 wake_up(&smcibdev
->lnks_deleted
);
1141 static void smcr_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
1142 struct smc_buf_desc
*buf_desc
)
1146 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++)
1147 smcr_buf_unmap_link(buf_desc
, is_rmb
, &lgr
->lnk
[i
]);
1149 if (buf_desc
->pages
)
1150 __free_pages(buf_desc
->pages
, buf_desc
->order
);
1154 static void smcd_buf_free(struct smc_link_group
*lgr
, bool is_dmb
,
1155 struct smc_buf_desc
*buf_desc
)
1158 /* restore original buf len */
1159 buf_desc
->len
+= sizeof(struct smcd_cdc_msg
);
1160 smc_ism_unregister_dmb(lgr
->smcd
, buf_desc
);
1162 kfree(buf_desc
->cpu_addr
);
1167 static void smc_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
1168 struct smc_buf_desc
*buf_desc
)
1171 smcd_buf_free(lgr
, is_rmb
, buf_desc
);
1173 smcr_buf_free(lgr
, is_rmb
, buf_desc
);
1176 static void __smc_lgr_free_bufs(struct smc_link_group
*lgr
, bool is_rmb
)
1178 struct smc_buf_desc
*buf_desc
, *bf_desc
;
1179 struct list_head
*buf_list
;
1182 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1184 buf_list
= &lgr
->rmbs
[i
];
1186 buf_list
= &lgr
->sndbufs
[i
];
1187 list_for_each_entry_safe(buf_desc
, bf_desc
, buf_list
,
1189 list_del(&buf_desc
->list
);
1190 smc_buf_free(lgr
, is_rmb
, buf_desc
);
1195 static void smc_lgr_free_bufs(struct smc_link_group
*lgr
)
1197 /* free send buffers */
1198 __smc_lgr_free_bufs(lgr
, false);
1200 __smc_lgr_free_bufs(lgr
, true);
1203 /* remove a link group */
1204 static void smc_lgr_free(struct smc_link_group
*lgr
)
1208 if (!lgr
->is_smcd
) {
1209 mutex_lock(&lgr
->llc_conf_mutex
);
1210 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1211 if (lgr
->lnk
[i
].state
!= SMC_LNK_UNUSED
)
1212 smcr_link_clear(&lgr
->lnk
[i
], false);
1214 mutex_unlock(&lgr
->llc_conf_mutex
);
1215 smc_llc_lgr_clear(lgr
);
1218 smc_lgr_free_bufs(lgr
);
1219 destroy_workqueue(lgr
->tx_wq
);
1221 smc_ism_put_vlan(lgr
->smcd
, lgr
->vlan_id
);
1222 put_device(&lgr
->smcd
->dev
);
1223 if (!atomic_dec_return(&lgr
->smcd
->lgr_cnt
))
1224 wake_up(&lgr
->smcd
->lgrs_deleted
);
1226 if (!atomic_dec_return(&lgr_cnt
))
1227 wake_up(&lgrs_deleted
);
1232 static void smcd_unregister_all_dmbs(struct smc_link_group
*lgr
)
1236 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1237 struct smc_buf_desc
*buf_desc
;
1239 list_for_each_entry(buf_desc
, &lgr
->rmbs
[i
], list
) {
1240 buf_desc
->len
+= sizeof(struct smcd_cdc_msg
);
1241 smc_ism_unregister_dmb(lgr
->smcd
, buf_desc
);
1246 static void smc_sk_wake_ups(struct smc_sock
*smc
)
1248 smc
->sk
.sk_write_space(&smc
->sk
);
1249 smc
->sk
.sk_data_ready(&smc
->sk
);
1250 smc
->sk
.sk_state_change(&smc
->sk
);
1253 /* kill a connection */
1254 static void smc_conn_kill(struct smc_connection
*conn
, bool soft
)
1256 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
1258 if (conn
->lgr
->is_smcd
&& conn
->lgr
->peer_shutdown
)
1259 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
= 1;
1261 smc_close_abort(conn
);
1263 smc
->sk
.sk_err
= ECONNABORTED
;
1264 smc_sk_wake_ups(smc
);
1265 if (conn
->lgr
->is_smcd
) {
1266 smc_ism_unset_conn(conn
);
1268 tasklet_kill(&conn
->rx_tsklet
);
1270 tasklet_unlock_wait(&conn
->rx_tsklet
);
1272 smc_cdc_tx_dismiss_slots(conn
);
1274 smc_lgr_unregister_conn(conn
);
1275 smc_close_active_abort(smc
);
1278 static void smc_lgr_cleanup(struct smc_link_group
*lgr
)
1281 smc_ism_signal_shutdown(lgr
);
1282 smcd_unregister_all_dmbs(lgr
);
1284 u32 rsn
= lgr
->llc_termination_rsn
;
1287 rsn
= SMC_LLC_DEL_PROG_INIT_TERM
;
1288 smc_llc_send_link_delete_all(lgr
, false, rsn
);
1289 smcr_lgr_link_deactivate_all(lgr
);
1293 /* terminate link group
1294 * @soft: true if link group shutdown can take its time
1295 * false if immediate link group shutdown is required
1297 static void __smc_lgr_terminate(struct smc_link_group
*lgr
, bool soft
)
1299 struct smc_connection
*conn
;
1300 struct smc_sock
*smc
;
1301 struct rb_node
*node
;
1303 if (lgr
->terminating
)
1304 return; /* lgr already terminating */
1305 /* cancel free_work sync, will terminate when lgr->freeing is set */
1306 cancel_delayed_work_sync(&lgr
->free_work
);
1307 lgr
->terminating
= 1;
1309 /* kill remaining link group connections */
1310 read_lock_bh(&lgr
->conns_lock
);
1311 node
= rb_first(&lgr
->conns_all
);
1313 read_unlock_bh(&lgr
->conns_lock
);
1314 conn
= rb_entry(node
, struct smc_connection
, alert_node
);
1315 smc
= container_of(conn
, struct smc_sock
, conn
);
1316 sock_hold(&smc
->sk
); /* sock_put below */
1317 lock_sock(&smc
->sk
);
1318 smc_conn_kill(conn
, soft
);
1319 release_sock(&smc
->sk
);
1320 sock_put(&smc
->sk
); /* sock_hold above */
1321 read_lock_bh(&lgr
->conns_lock
);
1322 node
= rb_first(&lgr
->conns_all
);
1324 read_unlock_bh(&lgr
->conns_lock
);
1325 smc_lgr_cleanup(lgr
);
1329 /* unlink link group and schedule termination */
1330 void smc_lgr_terminate_sched(struct smc_link_group
*lgr
)
1332 spinlock_t
*lgr_lock
;
1334 smc_lgr_list_head(lgr
, &lgr_lock
);
1335 spin_lock_bh(lgr_lock
);
1336 if (list_empty(&lgr
->list
) || lgr
->terminating
|| lgr
->freeing
) {
1337 spin_unlock_bh(lgr_lock
);
1338 return; /* lgr already terminating */
1340 list_del_init(&lgr
->list
);
1342 spin_unlock_bh(lgr_lock
);
1343 schedule_work(&lgr
->terminate_work
);
1346 /* Called when peer lgr shutdown (regularly or abnormally) is received */
1347 void smc_smcd_terminate(struct smcd_dev
*dev
, u64 peer_gid
, unsigned short vlan
)
1349 struct smc_link_group
*lgr
, *l
;
1350 LIST_HEAD(lgr_free_list
);
1352 /* run common cleanup function and build free list */
1353 spin_lock_bh(&dev
->lgr_lock
);
1354 list_for_each_entry_safe(lgr
, l
, &dev
->lgr_list
, list
) {
1355 if ((!peer_gid
|| lgr
->peer_gid
== peer_gid
) &&
1356 (vlan
== VLAN_VID_MASK
|| lgr
->vlan_id
== vlan
)) {
1357 if (peer_gid
) /* peer triggered termination */
1358 lgr
->peer_shutdown
= 1;
1359 list_move(&lgr
->list
, &lgr_free_list
);
1363 spin_unlock_bh(&dev
->lgr_lock
);
1365 /* cancel the regular free workers and actually free lgrs */
1366 list_for_each_entry_safe(lgr
, l
, &lgr_free_list
, list
) {
1367 list_del_init(&lgr
->list
);
1368 schedule_work(&lgr
->terminate_work
);
1372 /* Called when an SMCD device is removed or the smc module is unloaded */
1373 void smc_smcd_terminate_all(struct smcd_dev
*smcd
)
1375 struct smc_link_group
*lgr
, *lg
;
1376 LIST_HEAD(lgr_free_list
);
1378 spin_lock_bh(&smcd
->lgr_lock
);
1379 list_splice_init(&smcd
->lgr_list
, &lgr_free_list
);
1380 list_for_each_entry(lgr
, &lgr_free_list
, list
)
1382 spin_unlock_bh(&smcd
->lgr_lock
);
1384 list_for_each_entry_safe(lgr
, lg
, &lgr_free_list
, list
) {
1385 list_del_init(&lgr
->list
);
1386 __smc_lgr_terminate(lgr
, false);
1389 if (atomic_read(&smcd
->lgr_cnt
))
1390 wait_event(smcd
->lgrs_deleted
, !atomic_read(&smcd
->lgr_cnt
));
1393 /* Called when an SMCR device is removed or the smc module is unloaded.
1394 * If smcibdev is given, all SMCR link groups using this device are terminated.
1395 * If smcibdev is NULL, all SMCR link groups are terminated.
1397 void smc_smcr_terminate_all(struct smc_ib_device
*smcibdev
)
1399 struct smc_link_group
*lgr
, *lg
;
1400 LIST_HEAD(lgr_free_list
);
1403 spin_lock_bh(&smc_lgr_list
.lock
);
1405 list_splice_init(&smc_lgr_list
.list
, &lgr_free_list
);
1406 list_for_each_entry(lgr
, &lgr_free_list
, list
)
1409 list_for_each_entry_safe(lgr
, lg
, &smc_lgr_list
.list
, list
) {
1410 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1411 if (lgr
->lnk
[i
].smcibdev
== smcibdev
)
1412 smcr_link_down_cond_sched(&lgr
->lnk
[i
]);
1416 spin_unlock_bh(&smc_lgr_list
.lock
);
1418 list_for_each_entry_safe(lgr
, lg
, &lgr_free_list
, list
) {
1419 list_del_init(&lgr
->list
);
1420 smc_llc_set_termination_rsn(lgr
, SMC_LLC_DEL_OP_INIT_TERM
);
1421 __smc_lgr_terminate(lgr
, false);
1425 if (atomic_read(&smcibdev
->lnk_cnt
))
1426 wait_event(smcibdev
->lnks_deleted
,
1427 !atomic_read(&smcibdev
->lnk_cnt
));
1429 if (atomic_read(&lgr_cnt
))
1430 wait_event(lgrs_deleted
, !atomic_read(&lgr_cnt
));
1434 /* set new lgr type and clear all asymmetric link tagging */
1435 void smcr_lgr_set_type(struct smc_link_group
*lgr
, enum smc_lgr_type new_type
)
1437 char *lgr_type
= "";
1440 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++)
1441 if (smc_link_usable(&lgr
->lnk
[i
]))
1442 lgr
->lnk
[i
].link_is_asym
= false;
1443 if (lgr
->type
== new_type
)
1445 lgr
->type
= new_type
;
1447 switch (lgr
->type
) {
1451 case SMC_LGR_SINGLE
:
1452 lgr_type
= "SINGLE";
1454 case SMC_LGR_SYMMETRIC
:
1455 lgr_type
= "SYMMETRIC";
1457 case SMC_LGR_ASYMMETRIC_PEER
:
1458 lgr_type
= "ASYMMETRIC_PEER";
1460 case SMC_LGR_ASYMMETRIC_LOCAL
:
1461 lgr_type
= "ASYMMETRIC_LOCAL";
1464 pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
1465 "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE
, &lgr
->id
,
1466 lgr_type
, lgr
->pnet_id
);
1469 /* set new lgr type and tag a link as asymmetric */
1470 void smcr_lgr_set_type_asym(struct smc_link_group
*lgr
,
1471 enum smc_lgr_type new_type
, int asym_lnk_idx
)
1473 smcr_lgr_set_type(lgr
, new_type
);
1474 lgr
->lnk
[asym_lnk_idx
].link_is_asym
= true;
1477 /* abort connection, abort_work scheduled from tasklet context */
1478 static void smc_conn_abort_work(struct work_struct
*work
)
1480 struct smc_connection
*conn
= container_of(work
,
1481 struct smc_connection
,
1483 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
1485 smc_conn_kill(conn
, true);
1486 sock_put(&smc
->sk
); /* sock_hold done by schedulers of abort_work */
1489 void smcr_port_add(struct smc_ib_device
*smcibdev
, u8 ibport
)
1491 struct smc_link_group
*lgr
, *n
;
1493 list_for_each_entry_safe(lgr
, n
, &smc_lgr_list
.list
, list
) {
1494 struct smc_link
*link
;
1496 if (strncmp(smcibdev
->pnetid
[ibport
- 1], lgr
->pnet_id
,
1497 SMC_MAX_PNETID_LEN
) ||
1498 lgr
->type
== SMC_LGR_SYMMETRIC
||
1499 lgr
->type
== SMC_LGR_ASYMMETRIC_PEER
)
1502 /* trigger local add link processing */
1503 link
= smc_llc_usable_link(lgr
);
1505 smc_llc_add_link_local(link
);
1509 /* link is down - switch connections to alternate link,
1510 * must be called under lgr->llc_conf_mutex lock
1512 static void smcr_link_down(struct smc_link
*lnk
)
1514 struct smc_link_group
*lgr
= lnk
->lgr
;
1515 struct smc_link
*to_lnk
;
1518 if (!lgr
|| lnk
->state
== SMC_LNK_UNUSED
|| list_empty(&lgr
->list
))
1521 smc_ib_modify_qp_reset(lnk
);
1522 to_lnk
= smc_switch_conns(lgr
, lnk
, true);
1523 if (!to_lnk
) { /* no backup link available */
1524 smcr_link_clear(lnk
, true);
1527 smcr_lgr_set_type(lgr
, SMC_LGR_SINGLE
);
1528 del_link_id
= lnk
->link_id
;
1530 if (lgr
->role
== SMC_SERV
) {
1531 /* trigger local delete link processing */
1532 smc_llc_srv_delete_link_local(to_lnk
, del_link_id
);
1534 if (lgr
->llc_flow_lcl
.type
!= SMC_LLC_FLOW_NONE
) {
1535 /* another llc task is ongoing */
1536 mutex_unlock(&lgr
->llc_conf_mutex
);
1537 wait_event_timeout(lgr
->llc_flow_waiter
,
1538 (list_empty(&lgr
->list
) ||
1539 lgr
->llc_flow_lcl
.type
== SMC_LLC_FLOW_NONE
),
1541 mutex_lock(&lgr
->llc_conf_mutex
);
1543 if (!list_empty(&lgr
->list
)) {
1544 smc_llc_send_delete_link(to_lnk
, del_link_id
,
1546 SMC_LLC_DEL_LOST_PATH
);
1547 smcr_link_clear(lnk
, true);
1549 wake_up(&lgr
->llc_flow_waiter
); /* wake up next waiter */
1553 /* must be called under lgr->llc_conf_mutex lock */
1554 void smcr_link_down_cond(struct smc_link
*lnk
)
1556 if (smc_link_downing(&lnk
->state
))
1557 smcr_link_down(lnk
);
1560 /* will get the lgr->llc_conf_mutex lock */
1561 void smcr_link_down_cond_sched(struct smc_link
*lnk
)
1563 if (smc_link_downing(&lnk
->state
))
1564 schedule_work(&lnk
->link_down_wrk
);
1567 void smcr_port_err(struct smc_ib_device
*smcibdev
, u8 ibport
)
1569 struct smc_link_group
*lgr
, *n
;
1572 list_for_each_entry_safe(lgr
, n
, &smc_lgr_list
.list
, list
) {
1573 if (strncmp(smcibdev
->pnetid
[ibport
- 1], lgr
->pnet_id
,
1574 SMC_MAX_PNETID_LEN
))
1575 continue; /* lgr is not affected */
1576 if (list_empty(&lgr
->list
))
1578 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1579 struct smc_link
*lnk
= &lgr
->lnk
[i
];
1581 if (smc_link_usable(lnk
) &&
1582 lnk
->smcibdev
== smcibdev
&& lnk
->ibport
== ibport
)
1583 smcr_link_down_cond_sched(lnk
);
1588 static void smc_link_down_work(struct work_struct
*work
)
1590 struct smc_link
*link
= container_of(work
, struct smc_link
,
1592 struct smc_link_group
*lgr
= link
->lgr
;
1594 if (list_empty(&lgr
->list
))
1596 wake_up_all(&lgr
->llc_msg_waiter
);
1597 mutex_lock(&lgr
->llc_conf_mutex
);
1598 smcr_link_down(link
);
1599 mutex_unlock(&lgr
->llc_conf_mutex
);
1602 /* Determine vlan of internal TCP socket.
1603 * @vlan_id: address to store the determined vlan id into
1605 int smc_vlan_by_tcpsk(struct socket
*clcsock
, struct smc_init_info
*ini
)
1607 struct dst_entry
*dst
= sk_dst_get(clcsock
->sk
);
1608 struct net_device
*ndev
;
1609 int i
, nest_lvl
, rc
= 0;
1622 if (is_vlan_dev(ndev
)) {
1623 ini
->vlan_id
= vlan_dev_vlan_id(ndev
);
1628 nest_lvl
= ndev
->lower_level
;
1629 for (i
= 0; i
< nest_lvl
; i
++) {
1630 struct list_head
*lower
= &ndev
->adj_list
.lower
;
1632 if (list_empty(lower
))
1634 lower
= lower
->next
;
1635 ndev
= (struct net_device
*)netdev_lower_get_next(ndev
, &lower
);
1636 if (is_vlan_dev(ndev
)) {
1637 ini
->vlan_id
= vlan_dev_vlan_id(ndev
);
1649 static bool smcr_lgr_match(struct smc_link_group
*lgr
,
1650 struct smc_clc_msg_local
*lcl
,
1651 enum smc_lgr_role role
, u32 clcqpn
)
1655 if (memcmp(lgr
->peer_systemid
, lcl
->id_for_peer
, SMC_SYSTEMID_LEN
) ||
1659 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1660 if (!smc_link_active(&lgr
->lnk
[i
]))
1662 if ((lgr
->role
== SMC_SERV
|| lgr
->lnk
[i
].peer_qpn
== clcqpn
) &&
1663 !memcmp(lgr
->lnk
[i
].peer_gid
, &lcl
->gid
, SMC_GID_SIZE
) &&
1664 !memcmp(lgr
->lnk
[i
].peer_mac
, lcl
->mac
, sizeof(lcl
->mac
)))
1670 static bool smcd_lgr_match(struct smc_link_group
*lgr
,
1671 struct smcd_dev
*smcismdev
, u64 peer_gid
)
1673 return lgr
->peer_gid
== peer_gid
&& lgr
->smcd
== smcismdev
;
1676 /* create a new SMC connection (and a new link group if necessary) */
1677 int smc_conn_create(struct smc_sock
*smc
, struct smc_init_info
*ini
)
1679 struct smc_connection
*conn
= &smc
->conn
;
1680 struct list_head
*lgr_list
;
1681 struct smc_link_group
*lgr
;
1682 enum smc_lgr_role role
;
1683 spinlock_t
*lgr_lock
;
1686 lgr_list
= ini
->is_smcd
? &ini
->ism_dev
[ini
->ism_selected
]->lgr_list
:
1688 lgr_lock
= ini
->is_smcd
? &ini
->ism_dev
[ini
->ism_selected
]->lgr_lock
:
1690 ini
->first_contact_local
= 1;
1691 role
= smc
->listen_smc
? SMC_SERV
: SMC_CLNT
;
1692 if (role
== SMC_CLNT
&& ini
->first_contact_peer
)
1693 /* create new link group as well */
1696 /* determine if an existing link group can be reused */
1697 spin_lock_bh(lgr_lock
);
1698 list_for_each_entry(lgr
, lgr_list
, list
) {
1699 write_lock_bh(&lgr
->conns_lock
);
1701 smcd_lgr_match(lgr
, ini
->ism_dev
[ini
->ism_selected
],
1702 ini
->ism_peer_gid
[ini
->ism_selected
]) :
1703 smcr_lgr_match(lgr
, ini
->ib_lcl
, role
, ini
->ib_clcqpn
)) &&
1705 (ini
->smcd_version
== SMC_V2
||
1706 lgr
->vlan_id
== ini
->vlan_id
) &&
1707 (role
== SMC_CLNT
|| ini
->is_smcd
||
1708 lgr
->conns_num
< SMC_RMBS_PER_LGR_MAX
)) {
1709 /* link group found */
1710 ini
->first_contact_local
= 0;
1712 rc
= smc_lgr_register_conn(conn
, false);
1713 write_unlock_bh(&lgr
->conns_lock
);
1714 if (!rc
&& delayed_work_pending(&lgr
->free_work
))
1715 cancel_delayed_work(&lgr
->free_work
);
1718 write_unlock_bh(&lgr
->conns_lock
);
1720 spin_unlock_bh(lgr_lock
);
1724 if (role
== SMC_CLNT
&& !ini
->first_contact_peer
&&
1725 ini
->first_contact_local
) {
1726 /* Server reuses a link group, but Client wants to start
1728 * send out_of_sync decline, reason synchr. error
1730 return SMC_CLC_DECL_SYNCERR
;
1734 if (ini
->first_contact_local
) {
1735 rc
= smc_lgr_create(smc
, ini
);
1739 write_lock_bh(&lgr
->conns_lock
);
1740 rc
= smc_lgr_register_conn(conn
, true);
1741 write_unlock_bh(&lgr
->conns_lock
);
1745 conn
->local_tx_ctrl
.common
.type
= SMC_CDC_MSG_TYPE
;
1746 conn
->local_tx_ctrl
.len
= SMC_WR_TX_SIZE
;
1747 conn
->urg_state
= SMC_URG_READ
;
1748 INIT_WORK(&smc
->conn
.abort_work
, smc_conn_abort_work
);
1750 conn
->rx_off
= sizeof(struct smcd_cdc_msg
);
1751 smcd_cdc_rx_init(conn
); /* init tasklet for this conn */
1755 #ifndef KERNEL_HAS_ATOMIC64
1756 spin_lock_init(&conn
->acurs_lock
);
1763 /* convert the RMB size into the compressed notation - minimum 16K.
1764 * In contrast to plain ilog2, this rounds towards the next power of 2,
1765 * so the socket application gets at least its desired sndbuf / rcvbuf size.
1767 static u8
smc_compress_bufsize(int size
)
1771 if (size
<= SMC_BUF_MIN_SIZE
)
1774 size
= (size
- 1) >> 14;
1775 compressed
= ilog2(size
) + 1;
1776 if (compressed
>= SMC_RMBE_SIZES
)
1777 compressed
= SMC_RMBE_SIZES
- 1;
1781 /* convert the RMB size from compressed notation into integer */
1782 int smc_uncompress_bufsize(u8 compressed
)
1786 size
= 0x00000001 << (((int)compressed
) + 14);
1790 /* try to reuse a sndbuf or rmb description slot for a certain
1791 * buffer size; if not available, return NULL
1793 static struct smc_buf_desc
*smc_buf_get_slot(int compressed_bufsize
,
1795 struct list_head
*buf_list
)
1797 struct smc_buf_desc
*buf_slot
;
1800 list_for_each_entry(buf_slot
, buf_list
, list
) {
1801 if (cmpxchg(&buf_slot
->used
, 0, 1) == 0) {
1810 /* one of the conditions for announcing a receiver's current window size is
1811 * that it "results in a minimum increase in the window size of 10% of the
1812 * receive buffer space" [RFC7609]
1814 static inline int smc_rmb_wnd_update_limit(int rmbe_size
)
1816 return min_t(int, rmbe_size
/ 10, SOCK_MIN_SNDBUF
/ 2);
1819 /* map an rmb buf to a link */
1820 static int smcr_buf_map_link(struct smc_buf_desc
*buf_desc
, bool is_rmb
,
1821 struct smc_link
*lnk
)
1825 if (buf_desc
->is_map_ib
[lnk
->link_idx
])
1828 rc
= sg_alloc_table(&buf_desc
->sgt
[lnk
->link_idx
], 1, GFP_KERNEL
);
1831 sg_set_buf(buf_desc
->sgt
[lnk
->link_idx
].sgl
,
1832 buf_desc
->cpu_addr
, buf_desc
->len
);
1834 /* map sg table to DMA address */
1835 rc
= smc_ib_buf_map_sg(lnk
, buf_desc
,
1836 is_rmb
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
);
1837 /* SMC protocol depends on mapping to one DMA address only */
1843 /* create a new memory region for the RMB */
1845 rc
= smc_ib_get_memory_region(lnk
->roce_pd
,
1846 IB_ACCESS_REMOTE_WRITE
|
1847 IB_ACCESS_LOCAL_WRITE
,
1848 buf_desc
, lnk
->link_idx
);
1851 smc_ib_sync_sg_for_device(lnk
, buf_desc
, DMA_FROM_DEVICE
);
1853 buf_desc
->is_map_ib
[lnk
->link_idx
] = true;
1857 smc_ib_buf_unmap_sg(lnk
, buf_desc
,
1858 is_rmb
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
);
1860 sg_free_table(&buf_desc
->sgt
[lnk
->link_idx
]);
1864 /* register a new rmb on IB device,
1865 * must be called under lgr->llc_conf_mutex lock
1867 int smcr_link_reg_rmb(struct smc_link
*link
, struct smc_buf_desc
*rmb_desc
)
1869 if (list_empty(&link
->lgr
->list
))
1871 if (!rmb_desc
->is_reg_mr
[link
->link_idx
]) {
1872 /* register memory region for new rmb */
1873 if (smc_wr_reg_send(link
, rmb_desc
->mr_rx
[link
->link_idx
])) {
1874 rmb_desc
->is_reg_err
= true;
1877 rmb_desc
->is_reg_mr
[link
->link_idx
] = true;
1882 static int _smcr_buf_map_lgr(struct smc_link
*lnk
, struct mutex
*lock
,
1883 struct list_head
*lst
, bool is_rmb
)
1885 struct smc_buf_desc
*buf_desc
, *bf
;
1889 list_for_each_entry_safe(buf_desc
, bf
, lst
, list
) {
1890 if (!buf_desc
->used
)
1892 rc
= smcr_buf_map_link(buf_desc
, is_rmb
, lnk
);
1901 /* map all used buffers of lgr for a new link */
1902 int smcr_buf_map_lgr(struct smc_link
*lnk
)
1904 struct smc_link_group
*lgr
= lnk
->lgr
;
1907 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1908 rc
= _smcr_buf_map_lgr(lnk
, &lgr
->rmbs_lock
,
1909 &lgr
->rmbs
[i
], true);
1912 rc
= _smcr_buf_map_lgr(lnk
, &lgr
->sndbufs_lock
,
1913 &lgr
->sndbufs
[i
], false);
1920 /* register all used buffers of lgr for a new link,
1921 * must be called under lgr->llc_conf_mutex lock
1923 int smcr_buf_reg_lgr(struct smc_link
*lnk
)
1925 struct smc_link_group
*lgr
= lnk
->lgr
;
1926 struct smc_buf_desc
*buf_desc
, *bf
;
1929 mutex_lock(&lgr
->rmbs_lock
);
1930 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1931 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->rmbs
[i
], list
) {
1932 if (!buf_desc
->used
)
1934 rc
= smcr_link_reg_rmb(lnk
, buf_desc
);
1940 mutex_unlock(&lgr
->rmbs_lock
);
1944 static struct smc_buf_desc
*smcr_new_buf_create(struct smc_link_group
*lgr
,
1945 bool is_rmb
, int bufsize
)
1947 struct smc_buf_desc
*buf_desc
;
1949 /* try to alloc a new buffer */
1950 buf_desc
= kzalloc(sizeof(*buf_desc
), GFP_KERNEL
);
1952 return ERR_PTR(-ENOMEM
);
1954 buf_desc
->order
= get_order(bufsize
);
1955 buf_desc
->pages
= alloc_pages(GFP_KERNEL
| __GFP_NOWARN
|
1956 __GFP_NOMEMALLOC
| __GFP_COMP
|
1957 __GFP_NORETRY
| __GFP_ZERO
,
1959 if (!buf_desc
->pages
) {
1961 return ERR_PTR(-EAGAIN
);
1963 buf_desc
->cpu_addr
= (void *)page_address(buf_desc
->pages
);
1964 buf_desc
->len
= bufsize
;
1968 /* map buf_desc on all usable links,
1969 * unused buffers stay mapped as long as the link is up
1971 static int smcr_buf_map_usable_links(struct smc_link_group
*lgr
,
1972 struct smc_buf_desc
*buf_desc
, bool is_rmb
)
1976 /* protect against parallel link reconfiguration */
1977 mutex_lock(&lgr
->llc_conf_mutex
);
1978 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1979 struct smc_link
*lnk
= &lgr
->lnk
[i
];
1981 if (!smc_link_usable(lnk
))
1983 if (smcr_buf_map_link(buf_desc
, is_rmb
, lnk
)) {
1989 mutex_unlock(&lgr
->llc_conf_mutex
);
1993 #define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1995 static struct smc_buf_desc
*smcd_new_buf_create(struct smc_link_group
*lgr
,
1996 bool is_dmb
, int bufsize
)
1998 struct smc_buf_desc
*buf_desc
;
2001 if (smc_compress_bufsize(bufsize
) > SMCD_DMBE_SIZES
)
2002 return ERR_PTR(-EAGAIN
);
2004 /* try to alloc a new DMB */
2005 buf_desc
= kzalloc(sizeof(*buf_desc
), GFP_KERNEL
);
2007 return ERR_PTR(-ENOMEM
);
2009 rc
= smc_ism_register_dmb(lgr
, bufsize
, buf_desc
);
2013 return ERR_PTR(-EAGAIN
);
2015 return ERR_PTR(-ENOSPC
);
2016 return ERR_PTR(-EIO
);
2018 buf_desc
->pages
= virt_to_page(buf_desc
->cpu_addr
);
2019 /* CDC header stored in buf. So, pretend it was smaller */
2020 buf_desc
->len
= bufsize
- sizeof(struct smcd_cdc_msg
);
2022 buf_desc
->cpu_addr
= kzalloc(bufsize
, GFP_KERNEL
|
2023 __GFP_NOWARN
| __GFP_NORETRY
|
2025 if (!buf_desc
->cpu_addr
) {
2027 return ERR_PTR(-EAGAIN
);
2029 buf_desc
->len
= bufsize
;
2034 static int __smc_buf_create(struct smc_sock
*smc
, bool is_smcd
, bool is_rmb
)
2036 struct smc_buf_desc
*buf_desc
= ERR_PTR(-ENOMEM
);
2037 struct smc_connection
*conn
= &smc
->conn
;
2038 struct smc_link_group
*lgr
= conn
->lgr
;
2039 struct list_head
*buf_list
;
2040 int bufsize
, bufsize_short
;
2041 struct mutex
*lock
; /* lock buffer list */
2045 /* use socket recv buffer size (w/o overhead) as start value */
2046 sk_buf_size
= smc
->sk
.sk_rcvbuf
/ 2;
2048 /* use socket send buffer size (w/o overhead) as start value */
2049 sk_buf_size
= smc
->sk
.sk_sndbuf
/ 2;
2051 for (bufsize_short
= smc_compress_bufsize(sk_buf_size
);
2052 bufsize_short
>= 0; bufsize_short
--) {
2055 lock
= &lgr
->rmbs_lock
;
2056 buf_list
= &lgr
->rmbs
[bufsize_short
];
2058 lock
= &lgr
->sndbufs_lock
;
2059 buf_list
= &lgr
->sndbufs
[bufsize_short
];
2061 bufsize
= smc_uncompress_bufsize(bufsize_short
);
2062 if ((1 << get_order(bufsize
)) > SG_MAX_SINGLE_ALLOC
)
2065 /* check for reusable slot in the link group */
2066 buf_desc
= smc_buf_get_slot(bufsize_short
, lock
, buf_list
);
2068 memset(buf_desc
->cpu_addr
, 0, bufsize
);
2069 break; /* found reusable slot */
2073 buf_desc
= smcd_new_buf_create(lgr
, is_rmb
, bufsize
);
2075 buf_desc
= smcr_new_buf_create(lgr
, is_rmb
, bufsize
);
2077 if (PTR_ERR(buf_desc
) == -ENOMEM
)
2079 if (IS_ERR(buf_desc
))
2084 list_add(&buf_desc
->list
, buf_list
);
2089 if (IS_ERR(buf_desc
))
2090 return PTR_ERR(buf_desc
);
2093 if (smcr_buf_map_usable_links(lgr
, buf_desc
, is_rmb
)) {
2094 smcr_buf_unuse(buf_desc
, lgr
);
2100 conn
->rmb_desc
= buf_desc
;
2101 conn
->rmbe_size_short
= bufsize_short
;
2102 smc
->sk
.sk_rcvbuf
= bufsize
* 2;
2103 atomic_set(&conn
->bytes_to_rcv
, 0);
2104 conn
->rmbe_update_limit
=
2105 smc_rmb_wnd_update_limit(buf_desc
->len
);
2107 smc_ism_set_conn(conn
); /* map RMB/smcd_dev to conn */
2109 conn
->sndbuf_desc
= buf_desc
;
2110 smc
->sk
.sk_sndbuf
= bufsize
* 2;
2111 atomic_set(&conn
->sndbuf_space
, bufsize
);
2116 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection
*conn
)
2118 if (!conn
->lgr
|| conn
->lgr
->is_smcd
|| !smc_link_active(conn
->lnk
))
2120 smc_ib_sync_sg_for_cpu(conn
->lnk
, conn
->sndbuf_desc
, DMA_TO_DEVICE
);
2123 void smc_sndbuf_sync_sg_for_device(struct smc_connection
*conn
)
2125 if (!conn
->lgr
|| conn
->lgr
->is_smcd
|| !smc_link_active(conn
->lnk
))
2127 smc_ib_sync_sg_for_device(conn
->lnk
, conn
->sndbuf_desc
, DMA_TO_DEVICE
);
2130 void smc_rmb_sync_sg_for_cpu(struct smc_connection
*conn
)
2134 if (!conn
->lgr
|| conn
->lgr
->is_smcd
)
2136 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
2137 if (!smc_link_active(&conn
->lgr
->lnk
[i
]))
2139 smc_ib_sync_sg_for_cpu(&conn
->lgr
->lnk
[i
], conn
->rmb_desc
,
2144 void smc_rmb_sync_sg_for_device(struct smc_connection
*conn
)
2148 if (!conn
->lgr
|| conn
->lgr
->is_smcd
)
2150 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
2151 if (!smc_link_active(&conn
->lgr
->lnk
[i
]))
2153 smc_ib_sync_sg_for_device(&conn
->lgr
->lnk
[i
], conn
->rmb_desc
,
2158 /* create the send and receive buffer for an SMC socket;
2159 * receive buffers are called RMBs;
2160 * (even though the SMC protocol allows more than one RMB-element per RMB,
2161 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
2162 * extra RMB for every connection in a link group
2164 int smc_buf_create(struct smc_sock
*smc
, bool is_smcd
)
2168 /* create send buffer */
2169 rc
= __smc_buf_create(smc
, is_smcd
, false);
2173 rc
= __smc_buf_create(smc
, is_smcd
, true);
2175 mutex_lock(&smc
->conn
.lgr
->sndbufs_lock
);
2176 list_del(&smc
->conn
.sndbuf_desc
->list
);
2177 mutex_unlock(&smc
->conn
.lgr
->sndbufs_lock
);
2178 smc_buf_free(smc
->conn
.lgr
, false, smc
->conn
.sndbuf_desc
);
2179 smc
->conn
.sndbuf_desc
= NULL
;
2184 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group
*lgr
)
2188 for_each_clear_bit(i
, lgr
->rtokens_used_mask
, SMC_RMBS_PER_LGR_MAX
) {
2189 if (!test_and_set_bit(i
, lgr
->rtokens_used_mask
))
2195 static int smc_rtoken_find_by_link(struct smc_link_group
*lgr
, int lnk_idx
,
2200 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
2201 if (test_bit(i
, lgr
->rtokens_used_mask
) &&
2202 lgr
->rtokens
[i
][lnk_idx
].rkey
== rkey
)
2208 /* set rtoken for a new link to an existing rmb */
2209 void smc_rtoken_set(struct smc_link_group
*lgr
, int link_idx
, int link_idx_new
,
2210 __be32 nw_rkey_known
, __be64 nw_vaddr
, __be32 nw_rkey
)
2214 rtok_idx
= smc_rtoken_find_by_link(lgr
, link_idx
, ntohl(nw_rkey_known
));
2215 if (rtok_idx
== -ENOENT
)
2217 lgr
->rtokens
[rtok_idx
][link_idx_new
].rkey
= ntohl(nw_rkey
);
2218 lgr
->rtokens
[rtok_idx
][link_idx_new
].dma_addr
= be64_to_cpu(nw_vaddr
);
2221 /* set rtoken for a new link whose link_id is given */
2222 void smc_rtoken_set2(struct smc_link_group
*lgr
, int rtok_idx
, int link_id
,
2223 __be64 nw_vaddr
, __be32 nw_rkey
)
2225 u64 dma_addr
= be64_to_cpu(nw_vaddr
);
2226 u32 rkey
= ntohl(nw_rkey
);
2230 for (link_idx
= 0; link_idx
< SMC_LINKS_PER_LGR_MAX
; link_idx
++) {
2231 if (lgr
->lnk
[link_idx
].link_id
== link_id
) {
2238 lgr
->rtokens
[rtok_idx
][link_idx
].rkey
= rkey
;
2239 lgr
->rtokens
[rtok_idx
][link_idx
].dma_addr
= dma_addr
;
2242 /* add a new rtoken from peer */
2243 int smc_rtoken_add(struct smc_link
*lnk
, __be64 nw_vaddr
, __be32 nw_rkey
)
2245 struct smc_link_group
*lgr
= smc_get_lgr(lnk
);
2246 u64 dma_addr
= be64_to_cpu(nw_vaddr
);
2247 u32 rkey
= ntohl(nw_rkey
);
2250 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
2251 if (lgr
->rtokens
[i
][lnk
->link_idx
].rkey
== rkey
&&
2252 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
== dma_addr
&&
2253 test_bit(i
, lgr
->rtokens_used_mask
)) {
2254 /* already in list */
2258 i
= smc_rmb_reserve_rtoken_idx(lgr
);
2261 lgr
->rtokens
[i
][lnk
->link_idx
].rkey
= rkey
;
2262 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
= dma_addr
;
2266 /* delete an rtoken from all links */
2267 int smc_rtoken_delete(struct smc_link
*lnk
, __be32 nw_rkey
)
2269 struct smc_link_group
*lgr
= smc_get_lgr(lnk
);
2270 u32 rkey
= ntohl(nw_rkey
);
2273 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
2274 if (lgr
->rtokens
[i
][lnk
->link_idx
].rkey
== rkey
&&
2275 test_bit(i
, lgr
->rtokens_used_mask
)) {
2276 for (j
= 0; j
< SMC_LINKS_PER_LGR_MAX
; j
++) {
2277 lgr
->rtokens
[i
][j
].rkey
= 0;
2278 lgr
->rtokens
[i
][j
].dma_addr
= 0;
2280 clear_bit(i
, lgr
->rtokens_used_mask
);
2287 /* save rkey and dma_addr received from peer during clc handshake */
2288 int smc_rmb_rtoken_handling(struct smc_connection
*conn
,
2289 struct smc_link
*lnk
,
2290 struct smc_clc_msg_accept_confirm
*clc
)
2292 conn
->rtoken_idx
= smc_rtoken_add(lnk
, clc
->r0
.rmb_dma_addr
,
2294 if (conn
->rtoken_idx
< 0)
2295 return conn
->rtoken_idx
;
2299 static void smc_core_going_away(void)
2301 struct smc_ib_device
*smcibdev
;
2302 struct smcd_dev
*smcd
;
2304 mutex_lock(&smc_ib_devices
.mutex
);
2305 list_for_each_entry(smcibdev
, &smc_ib_devices
.list
, list
) {
2308 for (i
= 0; i
< SMC_MAX_PORTS
; i
++)
2309 set_bit(i
, smcibdev
->ports_going_away
);
2311 mutex_unlock(&smc_ib_devices
.mutex
);
2313 mutex_lock(&smcd_dev_list
.mutex
);
2314 list_for_each_entry(smcd
, &smcd_dev_list
.list
, list
) {
2315 smcd
->going_away
= 1;
2317 mutex_unlock(&smcd_dev_list
.mutex
);
2320 /* Clean up all SMC link groups */
2321 static void smc_lgrs_shutdown(void)
2323 struct smcd_dev
*smcd
;
2325 smc_core_going_away();
2327 smc_smcr_terminate_all(NULL
);
2329 mutex_lock(&smcd_dev_list
.mutex
);
2330 list_for_each_entry(smcd
, &smcd_dev_list
.list
, list
)
2331 smc_smcd_terminate_all(smcd
);
2332 mutex_unlock(&smcd_dev_list
.mutex
);
2335 static int smc_core_reboot_event(struct notifier_block
*this,
2336 unsigned long event
, void *ptr
)
2338 smc_lgrs_shutdown();
2339 smc_ib_unregister_client();
2343 static struct notifier_block smc_reboot_notifier
= {
2344 .notifier_call
= smc_core_reboot_event
,
2347 int __init
smc_core_init(void)
2349 return register_reboot_notifier(&smc_reboot_notifier
);
2352 /* Called (from smc_exit) when module is removed */
2353 void smc_core_exit(void)
2355 unregister_reboot_notifier(&smc_reboot_notifier
);
2356 smc_lgrs_shutdown();