1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Generic netlink support functions to configure an SMC-R PNET table
7 * Copyright IBM Corp. 2016
9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <linux/mutex.h>
16 #include <net/netlink.h>
17 #include <net/genetlink.h>
19 #include <uapi/linux/if.h>
20 #include <uapi/linux/smc.h>
22 #include <rdma/ib_verbs.h>
24 #include <net/netns/generic.h>
25 #include "smc_netns.h"
32 static struct net_device
*__pnet_find_base_ndev(struct net_device
*ndev
);
33 static struct net_device
*pnet_find_base_ndev(struct net_device
*ndev
);
35 static const struct nla_policy smc_pnet_policy
[SMC_PNETID_MAX
+ 1] = {
37 .type
= NLA_NUL_STRING
,
38 .len
= SMC_MAX_PNETID_LEN
40 [SMC_PNETID_ETHNAME
] = {
41 .type
= NLA_NUL_STRING
,
44 [SMC_PNETID_IBNAME
] = {
45 .type
= NLA_NUL_STRING
,
46 .len
= IB_DEVICE_NAME_MAX
- 1
48 [SMC_PNETID_IBPORT
] = { .type
= NLA_U8
}
51 static struct genl_family smc_pnet_nl_family
;
53 enum smc_pnet_nametype
{
58 /* pnet entry stored in pnet table */
59 struct smc_pnetentry
{
60 struct list_head list
;
61 char pnet_name
[SMC_MAX_PNETID_LEN
+ 1];
62 enum smc_pnet_nametype type
;
65 char eth_name
[IFNAMSIZ
+ 1];
66 struct net_device
*ndev
;
69 char ib_name
[IB_DEVICE_NAME_MAX
+ 1];
75 /* Check if the pnetid is set */
76 bool smc_pnet_is_pnetid_set(u8
*pnetid
)
78 if (pnetid
[0] == 0 || pnetid
[0] == _S
)
83 /* Check if two given pnetids match */
84 static bool smc_pnet_match(u8
*pnetid1
, u8
*pnetid2
)
88 for (i
= 0; i
< SMC_MAX_PNETID_LEN
; i
++) {
89 if ((pnetid1
[i
] == 0 || pnetid1
[i
] == _S
) &&
90 (pnetid2
[i
] == 0 || pnetid2
[i
] == _S
))
92 if (pnetid1
[i
] != pnetid2
[i
])
98 /* Remove a pnetid from the pnet table.
100 static int smc_pnet_remove_by_pnetid(struct net
*net
, char *pnet_name
)
102 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
103 struct smc_pnettable
*pnettable
;
104 struct smc_ib_device
*ibdev
;
105 struct smcd_dev
*smcd_dev
;
110 /* get pnettable for namespace */
111 sn
= net_generic(net
, smc_net_id
);
112 pnettable
= &sn
->pnettable
;
114 /* remove table entry */
115 write_lock(&pnettable
->lock
);
116 list_for_each_entry_safe(pnetelem
, tmp_pe
, &pnettable
->pnetlist
,
119 smc_pnet_match(pnetelem
->pnet_name
, pnet_name
)) {
120 list_del(&pnetelem
->list
);
121 if (pnetelem
->type
== SMC_PNET_ETH
&& pnetelem
->ndev
) {
122 dev_put(pnetelem
->ndev
);
123 pr_warn_ratelimited("smc: net device %s "
124 "erased user defined "
127 pnetelem
->pnet_name
);
133 write_unlock(&pnettable
->lock
);
135 /* if this is not the initial namespace, stop here */
136 if (net
!= &init_net
)
139 /* remove ib devices */
140 mutex_lock(&smc_ib_devices
.mutex
);
141 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
142 for (ibport
= 0; ibport
< SMC_MAX_PORTS
; ibport
++) {
143 if (ibdev
->pnetid_by_user
[ibport
] &&
145 smc_pnet_match(pnet_name
,
146 ibdev
->pnetid
[ibport
]))) {
147 pr_warn_ratelimited("smc: ib device %s ibport "
148 "%d erased user defined "
152 ibdev
->pnetid
[ibport
]);
153 memset(ibdev
->pnetid
[ibport
], 0,
155 ibdev
->pnetid_by_user
[ibport
] = false;
160 mutex_unlock(&smc_ib_devices
.mutex
);
161 /* remove smcd devices */
162 mutex_lock(&smcd_dev_list
.mutex
);
163 list_for_each_entry(smcd_dev
, &smcd_dev_list
.list
, list
) {
164 if (smcd_dev
->pnetid_by_user
&&
166 smc_pnet_match(pnet_name
, smcd_dev
->pnetid
))) {
167 pr_warn_ratelimited("smc: smcd device %s "
168 "erased user defined pnetid "
169 "%.16s\n", dev_name(&smcd_dev
->dev
),
171 memset(smcd_dev
->pnetid
, 0, SMC_MAX_PNETID_LEN
);
172 smcd_dev
->pnetid_by_user
= false;
176 mutex_unlock(&smcd_dev_list
.mutex
);
180 /* Add the reference to a given network device to the pnet table.
182 static int smc_pnet_add_by_ndev(struct net_device
*ndev
)
184 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
185 struct smc_pnettable
*pnettable
;
186 struct net
*net
= dev_net(ndev
);
190 /* get pnettable for namespace */
191 sn
= net_generic(net
, smc_net_id
);
192 pnettable
= &sn
->pnettable
;
194 write_lock(&pnettable
->lock
);
195 list_for_each_entry_safe(pnetelem
, tmp_pe
, &pnettable
->pnetlist
, list
) {
196 if (pnetelem
->type
== SMC_PNET_ETH
&& !pnetelem
->ndev
&&
197 !strncmp(pnetelem
->eth_name
, ndev
->name
, IFNAMSIZ
)) {
199 pnetelem
->ndev
= ndev
;
201 pr_warn_ratelimited("smc: adding net device %s with "
202 "user defined pnetid %.16s\n",
204 pnetelem
->pnet_name
);
208 write_unlock(&pnettable
->lock
);
212 /* Remove the reference to a given network device from the pnet table.
214 static int smc_pnet_remove_by_ndev(struct net_device
*ndev
)
216 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
217 struct smc_pnettable
*pnettable
;
218 struct net
*net
= dev_net(ndev
);
222 /* get pnettable for namespace */
223 sn
= net_generic(net
, smc_net_id
);
224 pnettable
= &sn
->pnettable
;
226 write_lock(&pnettable
->lock
);
227 list_for_each_entry_safe(pnetelem
, tmp_pe
, &pnettable
->pnetlist
, list
) {
228 if (pnetelem
->type
== SMC_PNET_ETH
&& pnetelem
->ndev
== ndev
) {
229 dev_put(pnetelem
->ndev
);
230 pnetelem
->ndev
= NULL
;
232 pr_warn_ratelimited("smc: removing net device %s with "
233 "user defined pnetid %.16s\n",
235 pnetelem
->pnet_name
);
239 write_unlock(&pnettable
->lock
);
243 /* Apply pnetid to ib device when no pnetid is set.
245 static bool smc_pnet_apply_ib(struct smc_ib_device
*ib_dev
, u8 ib_port
,
248 bool applied
= false;
250 mutex_lock(&smc_ib_devices
.mutex
);
251 if (!smc_pnet_is_pnetid_set(ib_dev
->pnetid
[ib_port
- 1])) {
252 memcpy(ib_dev
->pnetid
[ib_port
- 1], pnet_name
,
254 ib_dev
->pnetid_by_user
[ib_port
- 1] = true;
257 mutex_unlock(&smc_ib_devices
.mutex
);
261 /* Apply pnetid to smcd device when no pnetid is set.
263 static bool smc_pnet_apply_smcd(struct smcd_dev
*smcd_dev
, char *pnet_name
)
265 bool applied
= false;
267 mutex_lock(&smcd_dev_list
.mutex
);
268 if (!smc_pnet_is_pnetid_set(smcd_dev
->pnetid
)) {
269 memcpy(smcd_dev
->pnetid
, pnet_name
, SMC_MAX_PNETID_LEN
);
270 smcd_dev
->pnetid_by_user
= true;
273 mutex_unlock(&smcd_dev_list
.mutex
);
277 /* The limit for pnetid is 16 characters.
278 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
279 * Lower case letters are converted to upper case.
280 * Interior blanks should not be used.
282 static bool smc_pnetid_valid(const char *pnet_name
, char *pnetid
)
284 char *bf
= skip_spaces(pnet_name
);
285 size_t len
= strlen(bf
);
286 char *end
= bf
+ len
;
290 while (--end
>= bf
&& isspace(*end
))
292 if (end
- bf
>= SMC_MAX_PNETID_LEN
)
297 *pnetid
++ = islower(*bf
) ? toupper(*bf
) : *bf
;
304 /* Find an infiniband device by a given name. The device might not exist. */
305 static struct smc_ib_device
*smc_pnet_find_ib(char *ib_name
)
307 struct smc_ib_device
*ibdev
;
309 mutex_lock(&smc_ib_devices
.mutex
);
310 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
311 if (!strncmp(ibdev
->ibdev
->name
, ib_name
,
312 sizeof(ibdev
->ibdev
->name
)) ||
313 !strncmp(dev_name(ibdev
->ibdev
->dev
.parent
), ib_name
,
314 IB_DEVICE_NAME_MAX
- 1)) {
320 mutex_unlock(&smc_ib_devices
.mutex
);
324 /* Find an smcd device by a given name. The device might not exist. */
325 static struct smcd_dev
*smc_pnet_find_smcd(char *smcd_name
)
327 struct smcd_dev
*smcd_dev
;
329 mutex_lock(&smcd_dev_list
.mutex
);
330 list_for_each_entry(smcd_dev
, &smcd_dev_list
.list
, list
) {
331 if (!strncmp(dev_name(&smcd_dev
->dev
), smcd_name
,
332 IB_DEVICE_NAME_MAX
- 1))
337 mutex_unlock(&smcd_dev_list
.mutex
);
341 static int smc_pnet_add_eth(struct smc_pnettable
*pnettable
, struct net
*net
,
342 char *eth_name
, char *pnet_name
)
344 struct smc_pnetentry
*tmp_pe
, *new_pe
;
345 struct net_device
*ndev
, *base_ndev
;
346 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
350 /* check if (base) netdev already has a pnetid. If there is one, we do
351 * not want to add a pnet table entry
354 ndev
= dev_get_by_name(net
, eth_name
); /* dev_hold() */
356 base_ndev
= pnet_find_base_ndev(ndev
);
357 if (!smc_pnetid_by_dev_port(base_ndev
->dev
.parent
,
358 base_ndev
->dev_port
, ndev_pnetid
))
362 /* add a new netdev entry to the pnet table if there isn't one */
364 new_pe
= kzalloc(sizeof(*new_pe
), GFP_KERNEL
);
367 new_pe
->type
= SMC_PNET_ETH
;
368 memcpy(new_pe
->pnet_name
, pnet_name
, SMC_MAX_PNETID_LEN
);
369 strncpy(new_pe
->eth_name
, eth_name
, IFNAMSIZ
);
374 write_lock(&pnettable
->lock
);
375 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
376 if (tmp_pe
->type
== SMC_PNET_ETH
&&
377 !strncmp(tmp_pe
->eth_name
, eth_name
, IFNAMSIZ
)) {
383 list_add_tail(&new_pe
->list
, &pnettable
->pnetlist
);
384 write_unlock(&pnettable
->lock
);
386 write_unlock(&pnettable
->lock
);
391 pr_warn_ratelimited("smc: net device %s "
392 "applied user defined pnetid %.16s\n",
393 new_pe
->eth_name
, new_pe
->pnet_name
);
402 static int smc_pnet_add_ib(struct smc_pnettable
*pnettable
, char *ib_name
,
403 u8 ib_port
, char *pnet_name
)
405 struct smc_pnetentry
*tmp_pe
, *new_pe
;
406 struct smc_ib_device
*ib_dev
;
407 bool smcddev_applied
= true;
408 bool ibdev_applied
= true;
409 struct smcd_dev
*smcd_dev
;
412 /* try to apply the pnetid to active devices */
413 ib_dev
= smc_pnet_find_ib(ib_name
);
415 ibdev_applied
= smc_pnet_apply_ib(ib_dev
, ib_port
, pnet_name
);
417 pr_warn_ratelimited("smc: ib device %s ibport %d "
418 "applied user defined pnetid "
419 "%.16s\n", ib_dev
->ibdev
->name
,
421 ib_dev
->pnetid
[ib_port
- 1]);
423 smcd_dev
= smc_pnet_find_smcd(ib_name
);
425 smcddev_applied
= smc_pnet_apply_smcd(smcd_dev
, pnet_name
);
427 pr_warn_ratelimited("smc: smcd device %s "
428 "applied user defined pnetid "
429 "%.16s\n", dev_name(&smcd_dev
->dev
),
432 /* Apply fails when a device has a hardware-defined pnetid set, do not
433 * add a pnet table entry in that case.
435 if (!ibdev_applied
|| !smcddev_applied
)
438 /* add a new ib entry to the pnet table if there isn't one */
439 new_pe
= kzalloc(sizeof(*new_pe
), GFP_KERNEL
);
442 new_pe
->type
= SMC_PNET_IB
;
443 memcpy(new_pe
->pnet_name
, pnet_name
, SMC_MAX_PNETID_LEN
);
444 strncpy(new_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
);
445 new_pe
->ib_port
= ib_port
;
448 write_lock(&pnettable
->lock
);
449 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
450 if (tmp_pe
->type
== SMC_PNET_IB
&&
451 !strncmp(tmp_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
)) {
457 list_add_tail(&new_pe
->list
, &pnettable
->pnetlist
);
458 write_unlock(&pnettable
->lock
);
460 write_unlock(&pnettable
->lock
);
463 return (new_ibdev
) ? 0 : -EEXIST
;
466 /* Append a pnetid to the end of the pnet table if not already on this list.
468 static int smc_pnet_enter(struct net
*net
, struct nlattr
*tb
[])
470 char pnet_name
[SMC_MAX_PNETID_LEN
+ 1];
471 struct smc_pnettable
*pnettable
;
472 bool new_netdev
= false;
473 bool new_ibdev
= false;
479 /* get pnettable for namespace */
480 sn
= net_generic(net
, smc_net_id
);
481 pnettable
= &sn
->pnettable
;
484 if (!tb
[SMC_PNETID_NAME
])
486 string
= (char *)nla_data(tb
[SMC_PNETID_NAME
]);
487 if (!smc_pnetid_valid(string
, pnet_name
))
490 if (tb
[SMC_PNETID_ETHNAME
]) {
491 string
= (char *)nla_data(tb
[SMC_PNETID_ETHNAME
]);
492 rc
= smc_pnet_add_eth(pnettable
, net
, string
, pnet_name
);
495 else if (rc
!= -EEXIST
)
499 /* if this is not the initial namespace, stop here */
500 if (net
!= &init_net
)
501 return new_netdev
? 0 : -EEXIST
;
504 if (tb
[SMC_PNETID_IBNAME
]) {
505 string
= (char *)nla_data(tb
[SMC_PNETID_IBNAME
]);
506 string
= strim(string
);
507 if (tb
[SMC_PNETID_IBPORT
]) {
508 ibport
= nla_get_u8(tb
[SMC_PNETID_IBPORT
]);
509 if (ibport
< 1 || ibport
> SMC_MAX_PORTS
)
512 rc
= smc_pnet_add_ib(pnettable
, string
, ibport
, pnet_name
);
515 else if (rc
!= -EEXIST
)
518 return (new_netdev
|| new_ibdev
) ? 0 : -EEXIST
;
524 /* Convert an smc_pnetentry to a netlink attribute sequence */
525 static int smc_pnet_set_nla(struct sk_buff
*msg
,
526 struct smc_pnetentry
*pnetelem
)
528 if (nla_put_string(msg
, SMC_PNETID_NAME
, pnetelem
->pnet_name
))
530 if (pnetelem
->type
== SMC_PNET_ETH
) {
531 if (nla_put_string(msg
, SMC_PNETID_ETHNAME
,
535 if (nla_put_string(msg
, SMC_PNETID_ETHNAME
, "n/a"))
538 if (pnetelem
->type
== SMC_PNET_IB
) {
539 if (nla_put_string(msg
, SMC_PNETID_IBNAME
, pnetelem
->ib_name
) ||
540 nla_put_u8(msg
, SMC_PNETID_IBPORT
, pnetelem
->ib_port
))
543 if (nla_put_string(msg
, SMC_PNETID_IBNAME
, "n/a") ||
544 nla_put_u8(msg
, SMC_PNETID_IBPORT
, 0xff))
551 static int smc_pnet_add(struct sk_buff
*skb
, struct genl_info
*info
)
553 struct net
*net
= genl_info_net(info
);
555 return smc_pnet_enter(net
, info
->attrs
);
558 static int smc_pnet_del(struct sk_buff
*skb
, struct genl_info
*info
)
560 struct net
*net
= genl_info_net(info
);
562 if (!info
->attrs
[SMC_PNETID_NAME
])
564 return smc_pnet_remove_by_pnetid(net
,
565 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
568 static int smc_pnet_dump_start(struct netlink_callback
*cb
)
574 static int smc_pnet_dumpinfo(struct sk_buff
*skb
,
575 u32 portid
, u32 seq
, u32 flags
,
576 struct smc_pnetentry
*pnetelem
)
580 hdr
= genlmsg_put(skb
, portid
, seq
, &smc_pnet_nl_family
,
581 flags
, SMC_PNETID_GET
);
584 if (smc_pnet_set_nla(skb
, pnetelem
) < 0) {
585 genlmsg_cancel(skb
, hdr
);
588 genlmsg_end(skb
, hdr
);
592 static int _smc_pnet_dump(struct net
*net
, struct sk_buff
*skb
, u32 portid
,
593 u32 seq
, u8
*pnetid
, int start_idx
)
595 struct smc_pnettable
*pnettable
;
596 struct smc_pnetentry
*pnetelem
;
600 /* get pnettable for namespace */
601 sn
= net_generic(net
, smc_net_id
);
602 pnettable
= &sn
->pnettable
;
604 /* dump pnettable entries */
605 read_lock(&pnettable
->lock
);
606 list_for_each_entry(pnetelem
, &pnettable
->pnetlist
, list
) {
607 if (pnetid
&& !smc_pnet_match(pnetelem
->pnet_name
, pnetid
))
609 if (idx
++ < start_idx
)
611 /* if this is not the initial namespace, dump only netdev */
612 if (net
!= &init_net
&& pnetelem
->type
!= SMC_PNET_ETH
)
614 if (smc_pnet_dumpinfo(skb
, portid
, seq
, NLM_F_MULTI
,
620 read_unlock(&pnettable
->lock
);
624 static int smc_pnet_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
626 struct net
*net
= sock_net(skb
->sk
);
629 idx
= _smc_pnet_dump(net
, skb
, NETLINK_CB(cb
->skb
).portid
,
630 cb
->nlh
->nlmsg_seq
, NULL
, cb
->args
[0]);
636 /* Retrieve one PNETID entry */
637 static int smc_pnet_get(struct sk_buff
*skb
, struct genl_info
*info
)
639 struct net
*net
= genl_info_net(info
);
643 if (!info
->attrs
[SMC_PNETID_NAME
])
646 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
650 _smc_pnet_dump(net
, msg
, info
->snd_portid
, info
->snd_seq
,
651 nla_data(info
->attrs
[SMC_PNETID_NAME
]), 0);
653 /* finish multi part message and send it */
654 hdr
= nlmsg_put(msg
, info
->snd_portid
, info
->snd_seq
, NLMSG_DONE
, 0,
660 return genlmsg_reply(msg
, info
);
663 /* Remove and delete all pnetids from pnet table.
665 static int smc_pnet_flush(struct sk_buff
*skb
, struct genl_info
*info
)
667 struct net
*net
= genl_info_net(info
);
669 smc_pnet_remove_by_pnetid(net
, NULL
);
673 /* SMC_PNETID generic netlink operation definition */
674 static const struct genl_ops smc_pnet_ops
[] = {
676 .cmd
= SMC_PNETID_GET
,
677 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
678 /* can be retrieved by unprivileged users */
679 .doit
= smc_pnet_get
,
680 .dumpit
= smc_pnet_dump
,
681 .start
= smc_pnet_dump_start
684 .cmd
= SMC_PNETID_ADD
,
685 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
686 .flags
= GENL_ADMIN_PERM
,
690 .cmd
= SMC_PNETID_DEL
,
691 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
692 .flags
= GENL_ADMIN_PERM
,
696 .cmd
= SMC_PNETID_FLUSH
,
697 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
698 .flags
= GENL_ADMIN_PERM
,
699 .doit
= smc_pnet_flush
703 /* SMC_PNETID family definition */
704 static struct genl_family smc_pnet_nl_family __ro_after_init
= {
706 .name
= SMCR_GENL_FAMILY_NAME
,
707 .version
= SMCR_GENL_FAMILY_VERSION
,
708 .maxattr
= SMC_PNETID_MAX
,
709 .policy
= smc_pnet_policy
,
711 .module
= THIS_MODULE
,
713 .n_ops
= ARRAY_SIZE(smc_pnet_ops
)
716 bool smc_pnet_is_ndev_pnetid(struct net
*net
, u8
*pnetid
)
718 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
719 struct smc_pnetids_ndev_entry
*pe
;
722 read_lock(&sn
->pnetids_ndev
.lock
);
723 list_for_each_entry(pe
, &sn
->pnetids_ndev
.list
, list
) {
724 if (smc_pnet_match(pnetid
, pe
->pnetid
)) {
731 read_unlock(&sn
->pnetids_ndev
.lock
);
735 static int smc_pnet_add_pnetid(struct net
*net
, u8
*pnetid
)
737 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
738 struct smc_pnetids_ndev_entry
*pe
, *pi
;
740 pe
= kzalloc(sizeof(*pe
), GFP_KERNEL
);
744 write_lock(&sn
->pnetids_ndev
.lock
);
745 list_for_each_entry(pi
, &sn
->pnetids_ndev
.list
, list
) {
746 if (smc_pnet_match(pnetid
, pe
->pnetid
)) {
747 refcount_inc(&pi
->refcnt
);
752 refcount_set(&pe
->refcnt
, 1);
753 memcpy(pe
->pnetid
, pnetid
, SMC_MAX_PNETID_LEN
);
754 list_add_tail(&pe
->list
, &sn
->pnetids_ndev
.list
);
757 write_unlock(&sn
->pnetids_ndev
.lock
);
761 static void smc_pnet_remove_pnetid(struct net
*net
, u8
*pnetid
)
763 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
764 struct smc_pnetids_ndev_entry
*pe
, *pe2
;
766 write_lock(&sn
->pnetids_ndev
.lock
);
767 list_for_each_entry_safe(pe
, pe2
, &sn
->pnetids_ndev
.list
, list
) {
768 if (smc_pnet_match(pnetid
, pe
->pnetid
)) {
769 if (refcount_dec_and_test(&pe
->refcnt
)) {
776 write_unlock(&sn
->pnetids_ndev
.lock
);
779 static void smc_pnet_add_base_pnetid(struct net
*net
, struct net_device
*dev
,
782 struct net_device
*base_dev
;
784 base_dev
= __pnet_find_base_ndev(dev
);
785 if (base_dev
->flags
& IFF_UP
&&
786 !smc_pnetid_by_dev_port(base_dev
->dev
.parent
, base_dev
->dev_port
,
788 /* add to PNETIDs list */
789 smc_pnet_add_pnetid(net
, ndev_pnetid
);
793 /* create initial list of netdevice pnetids */
794 static void smc_pnet_create_pnetids_list(struct net
*net
)
796 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
797 struct net_device
*dev
;
800 for_each_netdev(net
, dev
)
801 smc_pnet_add_base_pnetid(net
, dev
, ndev_pnetid
);
805 /* clean up list of netdevice pnetids */
806 static void smc_pnet_destroy_pnetids_list(struct net
*net
)
808 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
809 struct smc_pnetids_ndev_entry
*pe
, *temp_pe
;
811 write_lock(&sn
->pnetids_ndev
.lock
);
812 list_for_each_entry_safe(pe
, temp_pe
, &sn
->pnetids_ndev
.list
, list
) {
816 write_unlock(&sn
->pnetids_ndev
.lock
);
819 static int smc_pnet_netdev_event(struct notifier_block
*this,
820 unsigned long event
, void *ptr
)
822 struct net_device
*event_dev
= netdev_notifier_info_to_dev(ptr
);
823 struct net
*net
= dev_net(event_dev
);
824 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
828 case NETDEV_UNREGISTER
:
829 smc_pnet_remove_by_ndev(event_dev
);
830 smc_ib_ndev_change(event_dev
, event
);
832 case NETDEV_REGISTER
:
833 smc_pnet_add_by_ndev(event_dev
);
834 smc_ib_ndev_change(event_dev
, event
);
837 smc_pnet_add_base_pnetid(net
, event_dev
, ndev_pnetid
);
840 event_dev
= __pnet_find_base_ndev(event_dev
);
841 if (!smc_pnetid_by_dev_port(event_dev
->dev
.parent
,
842 event_dev
->dev_port
, ndev_pnetid
)) {
843 /* remove from PNETIDs list */
844 smc_pnet_remove_pnetid(net
, ndev_pnetid
);
852 static struct notifier_block smc_netdev_notifier
= {
853 .notifier_call
= smc_pnet_netdev_event
856 /* init network namespace */
857 int smc_pnet_net_init(struct net
*net
)
859 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
860 struct smc_pnettable
*pnettable
= &sn
->pnettable
;
861 struct smc_pnetids_ndev
*pnetids_ndev
= &sn
->pnetids_ndev
;
863 INIT_LIST_HEAD(&pnettable
->pnetlist
);
864 rwlock_init(&pnettable
->lock
);
865 INIT_LIST_HEAD(&pnetids_ndev
->list
);
866 rwlock_init(&pnetids_ndev
->lock
);
868 smc_pnet_create_pnetids_list(net
);
873 int __init
smc_pnet_init(void)
877 rc
= genl_register_family(&smc_pnet_nl_family
);
880 rc
= register_netdevice_notifier(&smc_netdev_notifier
);
882 genl_unregister_family(&smc_pnet_nl_family
);
887 /* exit network namespace */
888 void smc_pnet_net_exit(struct net
*net
)
890 /* flush pnet table */
891 smc_pnet_remove_by_pnetid(net
, NULL
);
892 smc_pnet_destroy_pnetids_list(net
);
895 void smc_pnet_exit(void)
897 unregister_netdevice_notifier(&smc_netdev_notifier
);
898 genl_unregister_family(&smc_pnet_nl_family
);
901 static struct net_device
*__pnet_find_base_ndev(struct net_device
*ndev
)
906 nest_lvl
= ndev
->lower_level
;
907 for (i
= 0; i
< nest_lvl
; i
++) {
908 struct list_head
*lower
= &ndev
->adj_list
.lower
;
910 if (list_empty(lower
))
913 ndev
= netdev_lower_get_next(ndev
, &lower
);
918 /* Determine one base device for stacked net devices.
919 * If the lower device level contains more than one devices
920 * (for instance with bonding slaves), just the first device
921 * is used to reach a base device.
923 static struct net_device
*pnet_find_base_ndev(struct net_device
*ndev
)
926 ndev
= __pnet_find_base_ndev(ndev
);
931 static int smc_pnet_find_ndev_pnetid_by_table(struct net_device
*ndev
,
934 struct smc_pnettable
*pnettable
;
935 struct net
*net
= dev_net(ndev
);
936 struct smc_pnetentry
*pnetelem
;
940 /* get pnettable for namespace */
941 sn
= net_generic(net
, smc_net_id
);
942 pnettable
= &sn
->pnettable
;
944 read_lock(&pnettable
->lock
);
945 list_for_each_entry(pnetelem
, &pnettable
->pnetlist
, list
) {
946 if (pnetelem
->type
== SMC_PNET_ETH
&& ndev
== pnetelem
->ndev
) {
947 /* get pnetid of netdev device */
948 memcpy(pnetid
, pnetelem
->pnet_name
, SMC_MAX_PNETID_LEN
);
953 read_unlock(&pnettable
->lock
);
957 /* find a roce device for the given pnetid */
958 static void _smc_pnet_find_roce_by_pnetid(u8
*pnet_id
,
959 struct smc_init_info
*ini
,
960 struct smc_ib_device
*known_dev
)
962 struct smc_ib_device
*ibdev
;
966 mutex_lock(&smc_ib_devices
.mutex
);
967 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
968 if (ibdev
== known_dev
)
970 for (i
= 1; i
<= SMC_MAX_PORTS
; i
++) {
971 if (!rdma_is_port_valid(ibdev
->ibdev
, i
))
973 if (smc_pnet_match(ibdev
->pnetid
[i
- 1], pnet_id
) &&
974 smc_ib_port_active(ibdev
, i
) &&
975 !test_bit(i
- 1, ibdev
->ports_going_away
) &&
976 !smc_ib_determine_gid(ibdev
, i
, ini
->vlan_id
,
977 ini
->ib_gid
, NULL
)) {
985 mutex_unlock(&smc_ib_devices
.mutex
);
988 /* find alternate roce device with same pnet_id and vlan_id */
989 void smc_pnet_find_alt_roce(struct smc_link_group
*lgr
,
990 struct smc_init_info
*ini
,
991 struct smc_ib_device
*known_dev
)
993 _smc_pnet_find_roce_by_pnetid(lgr
->pnet_id
, ini
, known_dev
);
996 /* if handshake network device belongs to a roce device, return its
999 static void smc_pnet_find_rdma_dev(struct net_device
*netdev
,
1000 struct smc_init_info
*ini
)
1002 struct smc_ib_device
*ibdev
;
1004 mutex_lock(&smc_ib_devices
.mutex
);
1005 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
1006 struct net_device
*ndev
;
1009 for (i
= 1; i
<= SMC_MAX_PORTS
; i
++) {
1010 if (!rdma_is_port_valid(ibdev
->ibdev
, i
))
1012 if (!ibdev
->ibdev
->ops
.get_netdev
)
1014 ndev
= ibdev
->ibdev
->ops
.get_netdev(ibdev
->ibdev
, i
);
1018 if (netdev
== ndev
&&
1019 smc_ib_port_active(ibdev
, i
) &&
1020 !test_bit(i
- 1, ibdev
->ports_going_away
) &&
1021 !smc_ib_determine_gid(ibdev
, i
, ini
->vlan_id
,
1022 ini
->ib_gid
, NULL
)) {
1023 ini
->ib_dev
= ibdev
;
1029 mutex_unlock(&smc_ib_devices
.mutex
);
1032 /* Determine the corresponding IB device port based on the hardware PNETID.
1033 * Searching stops at the first matching active IB device port with vlan_id
1035 * If nothing found, check pnetid table.
1036 * If nothing found, try to use handshake device
1038 static void smc_pnet_find_roce_by_pnetid(struct net_device
*ndev
,
1039 struct smc_init_info
*ini
)
1041 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
1043 ndev
= pnet_find_base_ndev(ndev
);
1044 if (smc_pnetid_by_dev_port(ndev
->dev
.parent
, ndev
->dev_port
,
1046 smc_pnet_find_ndev_pnetid_by_table(ndev
, ndev_pnetid
)) {
1047 smc_pnet_find_rdma_dev(ndev
, ini
);
1048 return; /* pnetid could not be determined */
1050 _smc_pnet_find_roce_by_pnetid(ndev_pnetid
, ini
, NULL
);
1053 static void smc_pnet_find_ism_by_pnetid(struct net_device
*ndev
,
1054 struct smc_init_info
*ini
)
1056 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
1057 struct smcd_dev
*ismdev
;
1059 ndev
= pnet_find_base_ndev(ndev
);
1060 if (smc_pnetid_by_dev_port(ndev
->dev
.parent
, ndev
->dev_port
,
1062 smc_pnet_find_ndev_pnetid_by_table(ndev
, ndev_pnetid
))
1063 return; /* pnetid could not be determined */
1065 mutex_lock(&smcd_dev_list
.mutex
);
1066 list_for_each_entry(ismdev
, &smcd_dev_list
.list
, list
) {
1067 if (smc_pnet_match(ismdev
->pnetid
, ndev_pnetid
) &&
1068 !ismdev
->going_away
&&
1069 (!ini
->ism_peer_gid
[0] ||
1070 !smc_ism_cantalk(ini
->ism_peer_gid
[0], ini
->vlan_id
,
1072 ini
->ism_dev
[0] = ismdev
;
1076 mutex_unlock(&smcd_dev_list
.mutex
);
1079 /* PNET table analysis for a given sock:
1080 * determine ib_device and port belonging to used internal TCP socket
1081 * ethernet interface.
1083 void smc_pnet_find_roce_resource(struct sock
*sk
, struct smc_init_info
*ini
)
1085 struct dst_entry
*dst
= sk_dst_get(sk
);
1094 smc_pnet_find_roce_by_pnetid(dst
->dev
, ini
);
1102 void smc_pnet_find_ism_resource(struct sock
*sk
, struct smc_init_info
*ini
)
1104 struct dst_entry
*dst
= sk_dst_get(sk
);
1106 ini
->ism_dev
[0] = NULL
;
1112 smc_pnet_find_ism_by_pnetid(dst
->dev
, ini
);
1120 /* Lookup and apply a pnet table entry to the given ib device.
1122 int smc_pnetid_by_table_ib(struct smc_ib_device
*smcibdev
, u8 ib_port
)
1124 char *ib_name
= smcibdev
->ibdev
->name
;
1125 struct smc_pnettable
*pnettable
;
1126 struct smc_pnetentry
*tmp_pe
;
1130 /* get pnettable for init namespace */
1131 sn
= net_generic(&init_net
, smc_net_id
);
1132 pnettable
= &sn
->pnettable
;
1134 read_lock(&pnettable
->lock
);
1135 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
1136 if (tmp_pe
->type
== SMC_PNET_IB
&&
1137 !strncmp(tmp_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
) &&
1138 tmp_pe
->ib_port
== ib_port
) {
1139 smc_pnet_apply_ib(smcibdev
, ib_port
, tmp_pe
->pnet_name
);
1144 read_unlock(&pnettable
->lock
);
1149 /* Lookup and apply a pnet table entry to the given smcd device.
1151 int smc_pnetid_by_table_smcd(struct smcd_dev
*smcddev
)
1153 const char *ib_name
= dev_name(&smcddev
->dev
);
1154 struct smc_pnettable
*pnettable
;
1155 struct smc_pnetentry
*tmp_pe
;
1159 /* get pnettable for init namespace */
1160 sn
= net_generic(&init_net
, smc_net_id
);
1161 pnettable
= &sn
->pnettable
;
1163 read_lock(&pnettable
->lock
);
1164 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
1165 if (tmp_pe
->type
== SMC_PNET_IB
&&
1166 !strncmp(tmp_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
)) {
1167 smc_pnet_apply_smcd(smcddev
, tmp_pe
->pnet_name
);
1172 read_unlock(&pnettable
->lock
);