1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Generic netlink support functions to configure an SMC-R PNET table
7 * Copyright IBM Corp. 2016
9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <linux/mutex.h>
16 #include <net/netlink.h>
17 #include <net/genetlink.h>
19 #include <uapi/linux/if.h>
20 #include <uapi/linux/smc.h>
22 #include <rdma/ib_verbs.h>
24 #include <net/netns/generic.h>
25 #include "smc_netns.h"
32 static struct net_device
*__pnet_find_base_ndev(struct net_device
*ndev
);
33 static struct net_device
*pnet_find_base_ndev(struct net_device
*ndev
);
35 static const struct nla_policy smc_pnet_policy
[SMC_PNETID_MAX
+ 1] = {
37 .type
= NLA_NUL_STRING
,
38 .len
= SMC_MAX_PNETID_LEN
40 [SMC_PNETID_ETHNAME
] = {
41 .type
= NLA_NUL_STRING
,
44 [SMC_PNETID_IBNAME
] = {
45 .type
= NLA_NUL_STRING
,
46 .len
= IB_DEVICE_NAME_MAX
- 1
48 [SMC_PNETID_IBPORT
] = { .type
= NLA_U8
}
51 static struct genl_family smc_pnet_nl_family
;
53 enum smc_pnet_nametype
{
58 /* pnet entry stored in pnet table */
59 struct smc_pnetentry
{
60 struct list_head list
;
61 char pnet_name
[SMC_MAX_PNETID_LEN
+ 1];
62 enum smc_pnet_nametype type
;
65 char eth_name
[IFNAMSIZ
+ 1];
66 struct net_device
*ndev
;
67 netdevice_tracker dev_tracker
;
70 char ib_name
[IB_DEVICE_NAME_MAX
+ 1];
76 /* Check if the pnetid is set */
77 bool smc_pnet_is_pnetid_set(u8
*pnetid
)
79 if (pnetid
[0] == 0 || pnetid
[0] == _S
)
84 /* Check if two given pnetids match */
85 static bool smc_pnet_match(u8
*pnetid1
, u8
*pnetid2
)
89 for (i
= 0; i
< SMC_MAX_PNETID_LEN
; i
++) {
90 if ((pnetid1
[i
] == 0 || pnetid1
[i
] == _S
) &&
91 (pnetid2
[i
] == 0 || pnetid2
[i
] == _S
))
93 if (pnetid1
[i
] != pnetid2
[i
])
99 /* Remove a pnetid from the pnet table.
101 static int smc_pnet_remove_by_pnetid(struct net
*net
, char *pnet_name
)
103 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
104 struct smc_pnettable
*pnettable
;
105 struct smc_ib_device
*ibdev
;
106 struct smcd_dev
*smcd
;
111 /* get pnettable for namespace */
112 sn
= net_generic(net
, smc_net_id
);
113 pnettable
= &sn
->pnettable
;
115 /* remove table entry */
116 mutex_lock(&pnettable
->lock
);
117 list_for_each_entry_safe(pnetelem
, tmp_pe
, &pnettable
->pnetlist
,
120 smc_pnet_match(pnetelem
->pnet_name
, pnet_name
)) {
121 list_del(&pnetelem
->list
);
122 if (pnetelem
->type
== SMC_PNET_ETH
&& pnetelem
->ndev
) {
123 netdev_put(pnetelem
->ndev
,
124 &pnetelem
->dev_tracker
);
125 pr_warn_ratelimited("smc: net device %s "
126 "erased user defined "
129 pnetelem
->pnet_name
);
135 mutex_unlock(&pnettable
->lock
);
137 /* if this is not the initial namespace, stop here */
138 if (net
!= &init_net
)
141 /* remove ib devices */
142 mutex_lock(&smc_ib_devices
.mutex
);
143 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
144 for (ibport
= 0; ibport
< SMC_MAX_PORTS
; ibport
++) {
145 if (ibdev
->pnetid_by_user
[ibport
] &&
147 smc_pnet_match(pnet_name
,
148 ibdev
->pnetid
[ibport
]))) {
149 pr_warn_ratelimited("smc: ib device %s ibport "
150 "%d erased user defined "
154 ibdev
->pnetid
[ibport
]);
155 memset(ibdev
->pnetid
[ibport
], 0,
157 ibdev
->pnetid_by_user
[ibport
] = false;
162 mutex_unlock(&smc_ib_devices
.mutex
);
163 /* remove smcd devices */
164 mutex_lock(&smcd_dev_list
.mutex
);
165 list_for_each_entry(smcd
, &smcd_dev_list
.list
, list
) {
166 if (smcd
->pnetid_by_user
&&
168 smc_pnet_match(pnet_name
, smcd
->pnetid
))) {
169 pr_warn_ratelimited("smc: smcd device %s "
170 "erased user defined pnetid "
172 dev_name(smcd
->ops
->get_dev(smcd
)),
174 memset(smcd
->pnetid
, 0, SMC_MAX_PNETID_LEN
);
175 smcd
->pnetid_by_user
= false;
179 mutex_unlock(&smcd_dev_list
.mutex
);
183 /* Add the reference to a given network device to the pnet table.
185 static int smc_pnet_add_by_ndev(struct net_device
*ndev
)
187 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
188 struct smc_pnettable
*pnettable
;
189 struct net
*net
= dev_net(ndev
);
193 /* get pnettable for namespace */
194 sn
= net_generic(net
, smc_net_id
);
195 pnettable
= &sn
->pnettable
;
197 mutex_lock(&pnettable
->lock
);
198 list_for_each_entry_safe(pnetelem
, tmp_pe
, &pnettable
->pnetlist
, list
) {
199 if (pnetelem
->type
== SMC_PNET_ETH
&& !pnetelem
->ndev
&&
200 !strncmp(pnetelem
->eth_name
, ndev
->name
, IFNAMSIZ
)) {
201 netdev_hold(ndev
, &pnetelem
->dev_tracker
, GFP_ATOMIC
);
202 pnetelem
->ndev
= ndev
;
204 pr_warn_ratelimited("smc: adding net device %s with "
205 "user defined pnetid %.16s\n",
207 pnetelem
->pnet_name
);
211 mutex_unlock(&pnettable
->lock
);
215 /* Remove the reference to a given network device from the pnet table.
217 static int smc_pnet_remove_by_ndev(struct net_device
*ndev
)
219 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
220 struct smc_pnettable
*pnettable
;
221 struct net
*net
= dev_net(ndev
);
225 /* get pnettable for namespace */
226 sn
= net_generic(net
, smc_net_id
);
227 pnettable
= &sn
->pnettable
;
229 mutex_lock(&pnettable
->lock
);
230 list_for_each_entry_safe(pnetelem
, tmp_pe
, &pnettable
->pnetlist
, list
) {
231 if (pnetelem
->type
== SMC_PNET_ETH
&& pnetelem
->ndev
== ndev
) {
232 netdev_put(pnetelem
->ndev
, &pnetelem
->dev_tracker
);
233 pnetelem
->ndev
= NULL
;
235 pr_warn_ratelimited("smc: removing net device %s with "
236 "user defined pnetid %.16s\n",
238 pnetelem
->pnet_name
);
242 mutex_unlock(&pnettable
->lock
);
246 /* Apply pnetid to ib device when no pnetid is set.
248 static bool smc_pnet_apply_ib(struct smc_ib_device
*ib_dev
, u8 ib_port
,
251 bool applied
= false;
253 mutex_lock(&smc_ib_devices
.mutex
);
254 if (!smc_pnet_is_pnetid_set(ib_dev
->pnetid
[ib_port
- 1])) {
255 memcpy(ib_dev
->pnetid
[ib_port
- 1], pnet_name
,
257 ib_dev
->pnetid_by_user
[ib_port
- 1] = true;
260 mutex_unlock(&smc_ib_devices
.mutex
);
264 /* Apply pnetid to smcd device when no pnetid is set.
266 static bool smc_pnet_apply_smcd(struct smcd_dev
*smcd_dev
, char *pnet_name
)
268 bool applied
= false;
270 mutex_lock(&smcd_dev_list
.mutex
);
271 if (!smc_pnet_is_pnetid_set(smcd_dev
->pnetid
)) {
272 memcpy(smcd_dev
->pnetid
, pnet_name
, SMC_MAX_PNETID_LEN
);
273 smcd_dev
->pnetid_by_user
= true;
276 mutex_unlock(&smcd_dev_list
.mutex
);
280 /* The limit for pnetid is 16 characters.
281 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
282 * Lower case letters are converted to upper case.
283 * Interior blanks should not be used.
285 static bool smc_pnetid_valid(const char *pnet_name
, char *pnetid
)
287 char *bf
= skip_spaces(pnet_name
);
288 size_t len
= strlen(bf
);
289 char *end
= bf
+ len
;
293 while (--end
>= bf
&& isspace(*end
))
295 if (end
- bf
>= SMC_MAX_PNETID_LEN
)
300 *pnetid
++ = islower(*bf
) ? toupper(*bf
) : *bf
;
307 /* Find an infiniband device by a given name. The device might not exist. */
308 static struct smc_ib_device
*smc_pnet_find_ib(char *ib_name
)
310 struct smc_ib_device
*ibdev
;
312 mutex_lock(&smc_ib_devices
.mutex
);
313 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
314 if (!strncmp(ibdev
->ibdev
->name
, ib_name
,
315 sizeof(ibdev
->ibdev
->name
)) ||
316 (ibdev
->ibdev
->dev
.parent
&&
317 !strncmp(dev_name(ibdev
->ibdev
->dev
.parent
), ib_name
,
318 IB_DEVICE_NAME_MAX
- 1))) {
324 mutex_unlock(&smc_ib_devices
.mutex
);
328 /* Find an smcd device by a given name. The device might not exist. */
329 static struct smcd_dev
*smc_pnet_find_smcd(char *smcd_name
)
331 struct smcd_dev
*smcd_dev
;
333 mutex_lock(&smcd_dev_list
.mutex
);
334 list_for_each_entry(smcd_dev
, &smcd_dev_list
.list
, list
) {
335 if (!strncmp(dev_name(smcd_dev
->ops
->get_dev(smcd_dev
)),
336 smcd_name
, IB_DEVICE_NAME_MAX
- 1))
341 mutex_unlock(&smcd_dev_list
.mutex
);
345 static int smc_pnet_add_eth(struct smc_pnettable
*pnettable
, struct net
*net
,
346 char *eth_name
, char *pnet_name
)
348 struct smc_pnetentry
*tmp_pe
, *new_pe
;
349 struct net_device
*ndev
, *base_ndev
;
350 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
354 /* check if (base) netdev already has a pnetid. If there is one, we do
355 * not want to add a pnet table entry
358 ndev
= dev_get_by_name(net
, eth_name
); /* dev_hold() */
360 base_ndev
= pnet_find_base_ndev(ndev
);
361 if (!smc_pnetid_by_dev_port(base_ndev
->dev
.parent
,
362 base_ndev
->dev_port
, ndev_pnetid
))
366 /* add a new netdev entry to the pnet table if there isn't one */
368 new_pe
= kzalloc(sizeof(*new_pe
), GFP_KERNEL
);
371 new_pe
->type
= SMC_PNET_ETH
;
372 memcpy(new_pe
->pnet_name
, pnet_name
, SMC_MAX_PNETID_LEN
);
373 strncpy(new_pe
->eth_name
, eth_name
, IFNAMSIZ
);
376 mutex_lock(&pnettable
->lock
);
377 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
378 if (tmp_pe
->type
== SMC_PNET_ETH
&&
379 !strncmp(tmp_pe
->eth_name
, eth_name
, IFNAMSIZ
)) {
387 netdev_tracker_alloc(ndev
, &new_pe
->dev_tracker
,
390 list_add_tail(&new_pe
->list
, &pnettable
->pnetlist
);
391 mutex_unlock(&pnettable
->lock
);
393 mutex_unlock(&pnettable
->lock
);
398 pr_warn_ratelimited("smc: net device %s "
399 "applied user defined pnetid %.16s\n",
400 new_pe
->eth_name
, new_pe
->pnet_name
);
408 static int smc_pnet_add_ib(struct smc_pnettable
*pnettable
, char *ib_name
,
409 u8 ib_port
, char *pnet_name
)
411 struct smc_pnetentry
*tmp_pe
, *new_pe
;
412 struct smc_ib_device
*ib_dev
;
413 bool smcddev_applied
= true;
414 bool ibdev_applied
= true;
415 struct smcd_dev
*smcd
;
419 /* try to apply the pnetid to active devices */
420 ib_dev
= smc_pnet_find_ib(ib_name
);
422 ibdev_applied
= smc_pnet_apply_ib(ib_dev
, ib_port
, pnet_name
);
424 pr_warn_ratelimited("smc: ib device %s ibport %d "
425 "applied user defined pnetid "
426 "%.16s\n", ib_dev
->ibdev
->name
,
428 ib_dev
->pnetid
[ib_port
- 1]);
430 smcd
= smc_pnet_find_smcd(ib_name
);
432 smcddev_applied
= smc_pnet_apply_smcd(smcd
, pnet_name
);
433 if (smcddev_applied
) {
434 dev
= smcd
->ops
->get_dev(smcd
);
435 pr_warn_ratelimited("smc: smcd device %s "
436 "applied user defined pnetid "
437 "%.16s\n", dev_name(dev
),
441 /* Apply fails when a device has a hardware-defined pnetid set, do not
442 * add a pnet table entry in that case.
444 if (!ibdev_applied
|| !smcddev_applied
)
447 /* add a new ib entry to the pnet table if there isn't one */
448 new_pe
= kzalloc(sizeof(*new_pe
), GFP_KERNEL
);
451 new_pe
->type
= SMC_PNET_IB
;
452 memcpy(new_pe
->pnet_name
, pnet_name
, SMC_MAX_PNETID_LEN
);
453 strncpy(new_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
);
454 new_pe
->ib_port
= ib_port
;
457 mutex_lock(&pnettable
->lock
);
458 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
459 if (tmp_pe
->type
== SMC_PNET_IB
&&
460 !strncmp(tmp_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
)) {
466 list_add_tail(&new_pe
->list
, &pnettable
->pnetlist
);
467 mutex_unlock(&pnettable
->lock
);
469 mutex_unlock(&pnettable
->lock
);
472 return (new_ibdev
) ? 0 : -EEXIST
;
475 /* Append a pnetid to the end of the pnet table if not already on this list.
477 static int smc_pnet_enter(struct net
*net
, struct nlattr
*tb
[])
479 char pnet_name
[SMC_MAX_PNETID_LEN
+ 1];
480 struct smc_pnettable
*pnettable
;
481 bool new_netdev
= false;
482 bool new_ibdev
= false;
488 /* get pnettable for namespace */
489 sn
= net_generic(net
, smc_net_id
);
490 pnettable
= &sn
->pnettable
;
493 if (!tb
[SMC_PNETID_NAME
])
495 string
= (char *)nla_data(tb
[SMC_PNETID_NAME
]);
496 if (!smc_pnetid_valid(string
, pnet_name
))
499 if (tb
[SMC_PNETID_ETHNAME
]) {
500 string
= (char *)nla_data(tb
[SMC_PNETID_ETHNAME
]);
501 rc
= smc_pnet_add_eth(pnettable
, net
, string
, pnet_name
);
504 else if (rc
!= -EEXIST
)
508 /* if this is not the initial namespace, stop here */
509 if (net
!= &init_net
)
510 return new_netdev
? 0 : -EEXIST
;
513 if (tb
[SMC_PNETID_IBNAME
]) {
514 string
= (char *)nla_data(tb
[SMC_PNETID_IBNAME
]);
515 string
= strim(string
);
516 if (tb
[SMC_PNETID_IBPORT
]) {
517 ibport
= nla_get_u8(tb
[SMC_PNETID_IBPORT
]);
518 if (ibport
< 1 || ibport
> SMC_MAX_PORTS
)
521 rc
= smc_pnet_add_ib(pnettable
, string
, ibport
, pnet_name
);
524 else if (rc
!= -EEXIST
)
527 return (new_netdev
|| new_ibdev
) ? 0 : -EEXIST
;
533 /* Convert an smc_pnetentry to a netlink attribute sequence */
534 static int smc_pnet_set_nla(struct sk_buff
*msg
,
535 struct smc_pnetentry
*pnetelem
)
537 if (nla_put_string(msg
, SMC_PNETID_NAME
, pnetelem
->pnet_name
))
539 if (pnetelem
->type
== SMC_PNET_ETH
) {
540 if (nla_put_string(msg
, SMC_PNETID_ETHNAME
,
544 if (nla_put_string(msg
, SMC_PNETID_ETHNAME
, "n/a"))
547 if (pnetelem
->type
== SMC_PNET_IB
) {
548 if (nla_put_string(msg
, SMC_PNETID_IBNAME
, pnetelem
->ib_name
) ||
549 nla_put_u8(msg
, SMC_PNETID_IBPORT
, pnetelem
->ib_port
))
552 if (nla_put_string(msg
, SMC_PNETID_IBNAME
, "n/a") ||
553 nla_put_u8(msg
, SMC_PNETID_IBPORT
, 0xff))
560 static int smc_pnet_add(struct sk_buff
*skb
, struct genl_info
*info
)
562 struct net
*net
= genl_info_net(info
);
564 return smc_pnet_enter(net
, info
->attrs
);
567 static int smc_pnet_del(struct sk_buff
*skb
, struct genl_info
*info
)
569 struct net
*net
= genl_info_net(info
);
571 if (!info
->attrs
[SMC_PNETID_NAME
])
573 return smc_pnet_remove_by_pnetid(net
,
574 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
577 static int smc_pnet_dump_start(struct netlink_callback
*cb
)
583 static int smc_pnet_dumpinfo(struct sk_buff
*skb
,
584 u32 portid
, u32 seq
, u32 flags
,
585 struct smc_pnetentry
*pnetelem
)
589 hdr
= genlmsg_put(skb
, portid
, seq
, &smc_pnet_nl_family
,
590 flags
, SMC_PNETID_GET
);
593 if (smc_pnet_set_nla(skb
, pnetelem
) < 0) {
594 genlmsg_cancel(skb
, hdr
);
597 genlmsg_end(skb
, hdr
);
601 static int _smc_pnet_dump(struct net
*net
, struct sk_buff
*skb
, u32 portid
,
602 u32 seq
, u8
*pnetid
, int start_idx
)
604 struct smc_pnettable
*pnettable
;
605 struct smc_pnetentry
*pnetelem
;
609 /* get pnettable for namespace */
610 sn
= net_generic(net
, smc_net_id
);
611 pnettable
= &sn
->pnettable
;
613 /* dump pnettable entries */
614 mutex_lock(&pnettable
->lock
);
615 list_for_each_entry(pnetelem
, &pnettable
->pnetlist
, list
) {
616 if (pnetid
&& !smc_pnet_match(pnetelem
->pnet_name
, pnetid
))
618 if (idx
++ < start_idx
)
620 /* if this is not the initial namespace, dump only netdev */
621 if (net
!= &init_net
&& pnetelem
->type
!= SMC_PNET_ETH
)
623 if (smc_pnet_dumpinfo(skb
, portid
, seq
, NLM_F_MULTI
,
629 mutex_unlock(&pnettable
->lock
);
633 static int smc_pnet_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
635 struct net
*net
= sock_net(skb
->sk
);
638 idx
= _smc_pnet_dump(net
, skb
, NETLINK_CB(cb
->skb
).portid
,
639 cb
->nlh
->nlmsg_seq
, NULL
, cb
->args
[0]);
645 /* Retrieve one PNETID entry */
646 static int smc_pnet_get(struct sk_buff
*skb
, struct genl_info
*info
)
648 struct net
*net
= genl_info_net(info
);
652 if (!info
->attrs
[SMC_PNETID_NAME
])
655 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
659 _smc_pnet_dump(net
, msg
, info
->snd_portid
, info
->snd_seq
,
660 nla_data(info
->attrs
[SMC_PNETID_NAME
]), 0);
662 /* finish multi part message and send it */
663 hdr
= nlmsg_put(msg
, info
->snd_portid
, info
->snd_seq
, NLMSG_DONE
, 0,
669 return genlmsg_reply(msg
, info
);
672 /* Remove and delete all pnetids from pnet table.
674 static int smc_pnet_flush(struct sk_buff
*skb
, struct genl_info
*info
)
676 struct net
*net
= genl_info_net(info
);
678 smc_pnet_remove_by_pnetid(net
, NULL
);
682 /* SMC_PNETID generic netlink operation definition */
683 static const struct genl_ops smc_pnet_ops
[] = {
685 .cmd
= SMC_PNETID_GET
,
686 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
687 /* can be retrieved by unprivileged users */
688 .doit
= smc_pnet_get
,
689 .dumpit
= smc_pnet_dump
,
690 .start
= smc_pnet_dump_start
693 .cmd
= SMC_PNETID_ADD
,
694 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
695 .flags
= GENL_ADMIN_PERM
,
699 .cmd
= SMC_PNETID_DEL
,
700 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
701 .flags
= GENL_ADMIN_PERM
,
705 .cmd
= SMC_PNETID_FLUSH
,
706 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
707 .flags
= GENL_ADMIN_PERM
,
708 .doit
= smc_pnet_flush
712 /* SMC_PNETID family definition */
713 static struct genl_family smc_pnet_nl_family __ro_after_init
= {
715 .name
= SMCR_GENL_FAMILY_NAME
,
716 .version
= SMCR_GENL_FAMILY_VERSION
,
717 .maxattr
= SMC_PNETID_MAX
,
718 .policy
= smc_pnet_policy
,
720 .module
= THIS_MODULE
,
722 .n_ops
= ARRAY_SIZE(smc_pnet_ops
),
723 .resv_start_op
= SMC_PNETID_FLUSH
+ 1,
726 bool smc_pnet_is_ndev_pnetid(struct net
*net
, u8
*pnetid
)
728 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
729 struct smc_pnetids_ndev_entry
*pe
;
732 read_lock(&sn
->pnetids_ndev
.lock
);
733 list_for_each_entry(pe
, &sn
->pnetids_ndev
.list
, list
) {
734 if (smc_pnet_match(pnetid
, pe
->pnetid
)) {
741 read_unlock(&sn
->pnetids_ndev
.lock
);
745 static int smc_pnet_add_pnetid(struct net
*net
, u8
*pnetid
)
747 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
748 struct smc_pnetids_ndev_entry
*pe
, *pi
;
750 pe
= kzalloc(sizeof(*pe
), GFP_KERNEL
);
754 write_lock(&sn
->pnetids_ndev
.lock
);
755 list_for_each_entry(pi
, &sn
->pnetids_ndev
.list
, list
) {
756 if (smc_pnet_match(pnetid
, pi
->pnetid
)) {
757 refcount_inc(&pi
->refcnt
);
762 refcount_set(&pe
->refcnt
, 1);
763 memcpy(pe
->pnetid
, pnetid
, SMC_MAX_PNETID_LEN
);
764 list_add_tail(&pe
->list
, &sn
->pnetids_ndev
.list
);
767 write_unlock(&sn
->pnetids_ndev
.lock
);
771 static void smc_pnet_remove_pnetid(struct net
*net
, u8
*pnetid
)
773 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
774 struct smc_pnetids_ndev_entry
*pe
, *pe2
;
776 write_lock(&sn
->pnetids_ndev
.lock
);
777 list_for_each_entry_safe(pe
, pe2
, &sn
->pnetids_ndev
.list
, list
) {
778 if (smc_pnet_match(pnetid
, pe
->pnetid
)) {
779 if (refcount_dec_and_test(&pe
->refcnt
)) {
786 write_unlock(&sn
->pnetids_ndev
.lock
);
789 static void smc_pnet_add_base_pnetid(struct net
*net
, struct net_device
*dev
,
792 struct net_device
*base_dev
;
794 base_dev
= __pnet_find_base_ndev(dev
);
795 if (base_dev
->flags
& IFF_UP
&&
796 !smc_pnetid_by_dev_port(base_dev
->dev
.parent
, base_dev
->dev_port
,
798 /* add to PNETIDs list */
799 smc_pnet_add_pnetid(net
, ndev_pnetid
);
803 /* create initial list of netdevice pnetids */
804 static void smc_pnet_create_pnetids_list(struct net
*net
)
806 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
807 struct net_device
*dev
;
809 /* Newly created netns do not have devices.
810 * Do not even acquire rtnl.
812 if (list_empty(&net
->dev_base_head
))
815 /* Note: This might not be needed, because smc_pnet_netdev_event()
816 * is also calling smc_pnet_add_base_pnetid() when handling
820 for_each_netdev(net
, dev
)
821 smc_pnet_add_base_pnetid(net
, dev
, ndev_pnetid
);
825 /* clean up list of netdevice pnetids */
826 static void smc_pnet_destroy_pnetids_list(struct net
*net
)
828 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
829 struct smc_pnetids_ndev_entry
*pe
, *temp_pe
;
831 write_lock(&sn
->pnetids_ndev
.lock
);
832 list_for_each_entry_safe(pe
, temp_pe
, &sn
->pnetids_ndev
.list
, list
) {
836 write_unlock(&sn
->pnetids_ndev
.lock
);
839 static int smc_pnet_netdev_event(struct notifier_block
*this,
840 unsigned long event
, void *ptr
)
842 struct net_device
*event_dev
= netdev_notifier_info_to_dev(ptr
);
843 struct net
*net
= dev_net(event_dev
);
844 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
848 case NETDEV_UNREGISTER
:
849 smc_pnet_remove_by_ndev(event_dev
);
850 smc_ib_ndev_change(event_dev
, event
);
852 case NETDEV_REGISTER
:
853 smc_pnet_add_by_ndev(event_dev
);
854 smc_ib_ndev_change(event_dev
, event
);
857 smc_pnet_add_base_pnetid(net
, event_dev
, ndev_pnetid
);
860 event_dev
= __pnet_find_base_ndev(event_dev
);
861 if (!smc_pnetid_by_dev_port(event_dev
->dev
.parent
,
862 event_dev
->dev_port
, ndev_pnetid
)) {
863 /* remove from PNETIDs list */
864 smc_pnet_remove_pnetid(net
, ndev_pnetid
);
872 static struct notifier_block smc_netdev_notifier
= {
873 .notifier_call
= smc_pnet_netdev_event
876 /* init network namespace */
877 int smc_pnet_net_init(struct net
*net
)
879 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
880 struct smc_pnettable
*pnettable
= &sn
->pnettable
;
881 struct smc_pnetids_ndev
*pnetids_ndev
= &sn
->pnetids_ndev
;
883 INIT_LIST_HEAD(&pnettable
->pnetlist
);
884 mutex_init(&pnettable
->lock
);
885 INIT_LIST_HEAD(&pnetids_ndev
->list
);
886 rwlock_init(&pnetids_ndev
->lock
);
888 smc_pnet_create_pnetids_list(net
);
893 int __init
smc_pnet_init(void)
897 rc
= genl_register_family(&smc_pnet_nl_family
);
900 rc
= register_netdevice_notifier(&smc_netdev_notifier
);
902 genl_unregister_family(&smc_pnet_nl_family
);
907 /* exit network namespace */
908 void smc_pnet_net_exit(struct net
*net
)
910 /* flush pnet table */
911 smc_pnet_remove_by_pnetid(net
, NULL
);
912 smc_pnet_destroy_pnetids_list(net
);
915 void smc_pnet_exit(void)
917 unregister_netdevice_notifier(&smc_netdev_notifier
);
918 genl_unregister_family(&smc_pnet_nl_family
);
921 static struct net_device
*__pnet_find_base_ndev(struct net_device
*ndev
)
926 nest_lvl
= ndev
->lower_level
;
927 for (i
= 0; i
< nest_lvl
; i
++) {
928 struct list_head
*lower
= &ndev
->adj_list
.lower
;
930 if (list_empty(lower
))
933 ndev
= netdev_lower_get_next(ndev
, &lower
);
938 /* Determine one base device for stacked net devices.
939 * If the lower device level contains more than one devices
940 * (for instance with bonding slaves), just the first device
941 * is used to reach a base device.
943 static struct net_device
*pnet_find_base_ndev(struct net_device
*ndev
)
946 ndev
= __pnet_find_base_ndev(ndev
);
951 static int smc_pnet_find_ndev_pnetid_by_table(struct net_device
*ndev
,
954 struct smc_pnettable
*pnettable
;
955 struct net
*net
= dev_net(ndev
);
956 struct smc_pnetentry
*pnetelem
;
960 /* get pnettable for namespace */
961 sn
= net_generic(net
, smc_net_id
);
962 pnettable
= &sn
->pnettable
;
964 mutex_lock(&pnettable
->lock
);
965 list_for_each_entry(pnetelem
, &pnettable
->pnetlist
, list
) {
966 if (pnetelem
->type
== SMC_PNET_ETH
&& ndev
== pnetelem
->ndev
) {
967 /* get pnetid of netdev device */
968 memcpy(pnetid
, pnetelem
->pnet_name
, SMC_MAX_PNETID_LEN
);
973 mutex_unlock(&pnettable
->lock
);
977 static int smc_pnet_determine_gid(struct smc_ib_device
*ibdev
, int i
,
978 struct smc_init_info
*ini
)
980 if (!ini
->check_smcrv2
&&
981 !smc_ib_determine_gid(ibdev
, i
, ini
->vlan_id
, ini
->ib_gid
, NULL
,
987 if (ini
->check_smcrv2
&&
988 !smc_ib_determine_gid(ibdev
, i
, ini
->vlan_id
, ini
->smcrv2
.ib_gid_v2
,
989 NULL
, &ini
->smcrv2
)) {
990 ini
->smcrv2
.ib_dev_v2
= ibdev
;
991 ini
->smcrv2
.ib_port_v2
= i
;
997 /* find a roce device for the given pnetid */
998 static void _smc_pnet_find_roce_by_pnetid(u8
*pnet_id
,
999 struct smc_init_info
*ini
,
1000 struct smc_ib_device
*known_dev
,
1003 struct smc_ib_device
*ibdev
;
1006 mutex_lock(&smc_ib_devices
.mutex
);
1007 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
1008 if (ibdev
== known_dev
||
1009 !rdma_dev_access_netns(ibdev
->ibdev
, net
))
1011 for (i
= 1; i
<= SMC_MAX_PORTS
; i
++) {
1012 if (!rdma_is_port_valid(ibdev
->ibdev
, i
))
1014 if (smc_pnet_match(ibdev
->pnetid
[i
- 1], pnet_id
) &&
1015 smc_ib_port_active(ibdev
, i
) &&
1016 !test_bit(i
- 1, ibdev
->ports_going_away
)) {
1017 if (!smc_pnet_determine_gid(ibdev
, i
, ini
))
1023 mutex_unlock(&smc_ib_devices
.mutex
);
1026 /* find alternate roce device with same pnet_id, vlan_id and net namespace */
1027 void smc_pnet_find_alt_roce(struct smc_link_group
*lgr
,
1028 struct smc_init_info
*ini
,
1029 struct smc_ib_device
*known_dev
)
1031 struct net
*net
= lgr
->net
;
1033 _smc_pnet_find_roce_by_pnetid(lgr
->pnet_id
, ini
, known_dev
, net
);
1036 /* if handshake network device belongs to a roce device, return its
1037 * IB device and port
1039 static void smc_pnet_find_rdma_dev(struct net_device
*netdev
,
1040 struct smc_init_info
*ini
)
1042 struct net
*net
= dev_net(netdev
);
1043 struct smc_ib_device
*ibdev
;
1045 mutex_lock(&smc_ib_devices
.mutex
);
1046 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
1047 struct net_device
*ndev
;
1050 /* check rdma net namespace */
1051 if (!rdma_dev_access_netns(ibdev
->ibdev
, net
))
1054 for (i
= 1; i
<= SMC_MAX_PORTS
; i
++) {
1055 if (!rdma_is_port_valid(ibdev
->ibdev
, i
))
1057 ndev
= ib_device_get_netdev(ibdev
->ibdev
, i
);
1061 if (netdev
== ndev
&&
1062 smc_ib_port_active(ibdev
, i
) &&
1063 !test_bit(i
- 1, ibdev
->ports_going_away
)) {
1064 if (!smc_pnet_determine_gid(ibdev
, i
, ini
))
1069 mutex_unlock(&smc_ib_devices
.mutex
);
1072 /* Determine the corresponding IB device port based on the hardware PNETID.
1073 * Searching stops at the first matching active IB device port with vlan_id
1075 * If nothing found, check pnetid table.
1076 * If nothing found, try to use handshake device
1078 static void smc_pnet_find_roce_by_pnetid(struct net_device
*ndev
,
1079 struct smc_init_info
*ini
)
1081 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
1084 ndev
= pnet_find_base_ndev(ndev
);
1085 net
= dev_net(ndev
);
1086 if (smc_pnetid_by_dev_port(ndev
->dev
.parent
, ndev
->dev_port
,
1088 smc_pnet_find_ndev_pnetid_by_table(ndev
, ndev_pnetid
)) {
1089 smc_pnet_find_rdma_dev(ndev
, ini
);
1090 return; /* pnetid could not be determined */
1092 _smc_pnet_find_roce_by_pnetid(ndev_pnetid
, ini
, NULL
, net
);
1095 static void smc_pnet_find_ism_by_pnetid(struct net_device
*ndev
,
1096 struct smc_init_info
*ini
)
1098 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
1099 struct smcd_dev
*ismdev
;
1101 ndev
= pnet_find_base_ndev(ndev
);
1102 if (smc_pnetid_by_dev_port(ndev
->dev
.parent
, ndev
->dev_port
,
1104 smc_pnet_find_ndev_pnetid_by_table(ndev
, ndev_pnetid
))
1105 return; /* pnetid could not be determined */
1107 mutex_lock(&smcd_dev_list
.mutex
);
1108 list_for_each_entry(ismdev
, &smcd_dev_list
.list
, list
) {
1109 if (smc_pnet_match(ismdev
->pnetid
, ndev_pnetid
) &&
1110 !ismdev
->going_away
&&
1111 (!ini
->ism_peer_gid
[0].gid
||
1112 !smc_ism_cantalk(&ini
->ism_peer_gid
[0], ini
->vlan_id
,
1114 ini
->ism_dev
[0] = ismdev
;
1118 mutex_unlock(&smcd_dev_list
.mutex
);
1121 /* PNET table analysis for a given sock:
1122 * determine ib_device and port belonging to used internal TCP socket
1123 * ethernet interface.
1125 void smc_pnet_find_roce_resource(struct sock
*sk
, struct smc_init_info
*ini
)
1127 struct dst_entry
*dst
= sk_dst_get(sk
);
1134 smc_pnet_find_roce_by_pnetid(dst
->dev
, ini
);
1142 void smc_pnet_find_ism_resource(struct sock
*sk
, struct smc_init_info
*ini
)
1144 struct dst_entry
*dst
= sk_dst_get(sk
);
1146 ini
->ism_dev
[0] = NULL
;
1152 smc_pnet_find_ism_by_pnetid(dst
->dev
, ini
);
1160 /* Lookup and apply a pnet table entry to the given ib device.
1162 int smc_pnetid_by_table_ib(struct smc_ib_device
*smcibdev
, u8 ib_port
)
1164 char *ib_name
= smcibdev
->ibdev
->name
;
1165 struct smc_pnettable
*pnettable
;
1166 struct smc_pnetentry
*tmp_pe
;
1170 /* get pnettable for init namespace */
1171 sn
= net_generic(&init_net
, smc_net_id
);
1172 pnettable
= &sn
->pnettable
;
1174 mutex_lock(&pnettable
->lock
);
1175 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
1176 if (tmp_pe
->type
== SMC_PNET_IB
&&
1177 !strncmp(tmp_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
) &&
1178 tmp_pe
->ib_port
== ib_port
) {
1179 smc_pnet_apply_ib(smcibdev
, ib_port
, tmp_pe
->pnet_name
);
1184 mutex_unlock(&pnettable
->lock
);
1189 /* Lookup and apply a pnet table entry to the given smcd device.
1191 int smc_pnetid_by_table_smcd(struct smcd_dev
*smcddev
)
1193 const char *ib_name
= dev_name(smcddev
->ops
->get_dev(smcddev
));
1194 struct smc_pnettable
*pnettable
;
1195 struct smc_pnetentry
*tmp_pe
;
1199 /* get pnettable for init namespace */
1200 sn
= net_generic(&init_net
, smc_net_id
);
1201 pnettable
= &sn
->pnettable
;
1203 mutex_lock(&pnettable
->lock
);
1204 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
1205 if (tmp_pe
->type
== SMC_PNET_IB
&&
1206 !strncmp(tmp_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
)) {
1207 smc_pnet_apply_smcd(smcddev
, tmp_pe
->pnet_name
);
1212 mutex_unlock(&pnettable
->lock
);