1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Generic netlink support functions to configure an SMC-R PNET table
7 * Copyright IBM Corp. 2016
9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <net/netlink.h>
16 #include <net/genetlink.h>
18 #include <uapi/linux/if.h>
19 #include <uapi/linux/smc.h>
21 #include <rdma/ib_verbs.h>
27 static struct nla_policy smc_pnet_policy
[SMC_PNETID_MAX
+ 1] = {
29 .type
= NLA_NUL_STRING
,
30 .len
= SMC_MAX_PNETID_LEN
- 1
32 [SMC_PNETID_ETHNAME
] = {
33 .type
= NLA_NUL_STRING
,
36 [SMC_PNETID_IBNAME
] = {
37 .type
= NLA_NUL_STRING
,
38 .len
= IB_DEVICE_NAME_MAX
- 1
40 [SMC_PNETID_IBPORT
] = { .type
= NLA_U8
}
43 static struct genl_family smc_pnet_nl_family
;
46 * struct smc_pnettable - SMC PNET table anchor
47 * @lock: Lock for list action
48 * @pnetlist: List of PNETIDs
50 static struct smc_pnettable
{
52 struct list_head pnetlist
;
54 .pnetlist
= LIST_HEAD_INIT(smc_pnettable
.pnetlist
),
55 .lock
= __RW_LOCK_UNLOCKED(smc_pnettable
.lock
)
59 * struct smc_pnetentry - pnet identifier name entry
61 * @pnet_name: Pnet identifier name
62 * @ndev: pointer to network device.
63 * @smcibdev: Pointer to IB device.
65 struct smc_pnetentry
{
66 struct list_head list
;
67 char pnet_name
[SMC_MAX_PNETID_LEN
+ 1];
68 struct net_device
*ndev
;
69 struct smc_ib_device
*smcibdev
;
73 /* Check if two RDMA device entries are identical. Use device name and port
74 * number for comparison.
76 static bool smc_pnet_same_ibname(struct smc_pnetentry
*pnetelem
, char *ibname
,
79 return pnetelem
->ib_port
== ibport
&&
80 !strncmp(pnetelem
->smcibdev
->ibdev
->name
, ibname
,
81 sizeof(pnetelem
->smcibdev
->ibdev
->name
));
84 /* Find a pnetid in the pnet table.
86 static struct smc_pnetentry
*smc_pnet_find_pnetid(char *pnet_name
)
88 struct smc_pnetentry
*pnetelem
, *found_pnetelem
= NULL
;
90 read_lock(&smc_pnettable
.lock
);
91 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
92 if (!strncmp(pnetelem
->pnet_name
, pnet_name
,
93 sizeof(pnetelem
->pnet_name
))) {
94 found_pnetelem
= pnetelem
;
98 read_unlock(&smc_pnettable
.lock
);
99 return found_pnetelem
;
102 /* Remove a pnetid from the pnet table.
104 static int smc_pnet_remove_by_pnetid(char *pnet_name
)
106 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
109 write_lock(&smc_pnettable
.lock
);
110 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
112 if (!strncmp(pnetelem
->pnet_name
, pnet_name
,
113 sizeof(pnetelem
->pnet_name
))) {
114 list_del(&pnetelem
->list
);
115 dev_put(pnetelem
->ndev
);
121 write_unlock(&smc_pnettable
.lock
);
125 /* Remove a pnet entry mentioning a given network device from the pnet table.
127 static int smc_pnet_remove_by_ndev(struct net_device
*ndev
)
129 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
132 write_lock(&smc_pnettable
.lock
);
133 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
135 if (pnetelem
->ndev
== ndev
) {
136 list_del(&pnetelem
->list
);
137 dev_put(pnetelem
->ndev
);
143 write_unlock(&smc_pnettable
.lock
);
147 /* Remove a pnet entry mentioning a given ib device from the pnet table.
149 int smc_pnet_remove_by_ibdev(struct smc_ib_device
*ibdev
)
151 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
154 write_lock(&smc_pnettable
.lock
);
155 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
157 if (pnetelem
->smcibdev
== ibdev
) {
158 list_del(&pnetelem
->list
);
159 dev_put(pnetelem
->ndev
);
165 write_unlock(&smc_pnettable
.lock
);
169 /* Append a pnetid to the end of the pnet table if not already on this list.
171 static int smc_pnet_enter(struct smc_pnetentry
*new_pnetelem
)
173 struct smc_pnetentry
*pnetelem
;
176 write_lock(&smc_pnettable
.lock
);
177 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
178 if (!strncmp(pnetelem
->pnet_name
, new_pnetelem
->pnet_name
,
179 sizeof(new_pnetelem
->pnet_name
)) ||
180 !strncmp(pnetelem
->ndev
->name
, new_pnetelem
->ndev
->name
,
181 sizeof(new_pnetelem
->ndev
->name
)) ||
182 smc_pnet_same_ibname(pnetelem
,
183 new_pnetelem
->smcibdev
->ibdev
->name
,
184 new_pnetelem
->ib_port
)) {
185 dev_put(pnetelem
->ndev
);
189 list_add_tail(&new_pnetelem
->list
, &smc_pnettable
.pnetlist
);
192 write_unlock(&smc_pnettable
.lock
);
196 /* The limit for pnetid is 16 characters.
197 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
198 * Lower case letters are converted to upper case.
199 * Interior blanks should not be used.
201 static bool smc_pnetid_valid(const char *pnet_name
, char *pnetid
)
203 char *bf
= skip_spaces(pnet_name
);
204 size_t len
= strlen(bf
);
205 char *end
= bf
+ len
;
209 while (--end
>= bf
&& isspace(*end
))
211 if (end
- bf
>= SMC_MAX_PNETID_LEN
)
216 *pnetid
++ = islower(*bf
) ? toupper(*bf
) : *bf
;
223 /* Find an infiniband device by a given name. The device might not exist. */
224 static struct smc_ib_device
*smc_pnet_find_ib(char *ib_name
)
226 struct smc_ib_device
*ibdev
;
228 spin_lock(&smc_ib_devices
.lock
);
229 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
230 if (!strncmp(ibdev
->ibdev
->name
, ib_name
,
231 sizeof(ibdev
->ibdev
->name
))) {
237 spin_unlock(&smc_ib_devices
.lock
);
241 /* Parse the supplied netlink attributes and fill a pnetentry structure.
242 * For ethernet and infiniband device names verify that the devices exist.
244 static int smc_pnet_fill_entry(struct net
*net
, struct smc_pnetentry
*pnetelem
,
247 char *string
, *ibname
;
250 memset(pnetelem
, 0, sizeof(*pnetelem
));
251 INIT_LIST_HEAD(&pnetelem
->list
);
254 if (!tb
[SMC_PNETID_NAME
])
256 string
= (char *)nla_data(tb
[SMC_PNETID_NAME
]);
257 if (!smc_pnetid_valid(string
, pnetelem
->pnet_name
))
261 if (!tb
[SMC_PNETID_ETHNAME
])
264 string
= (char *)nla_data(tb
[SMC_PNETID_ETHNAME
]);
265 pnetelem
->ndev
= dev_get_by_name(net
, string
);
270 if (!tb
[SMC_PNETID_IBNAME
])
273 ibname
= (char *)nla_data(tb
[SMC_PNETID_IBNAME
]);
274 ibname
= strim(ibname
);
275 pnetelem
->smcibdev
= smc_pnet_find_ib(ibname
);
276 if (!pnetelem
->smcibdev
)
280 if (!tb
[SMC_PNETID_IBPORT
])
282 pnetelem
->ib_port
= nla_get_u8(tb
[SMC_PNETID_IBPORT
]);
283 if (pnetelem
->ib_port
< 1 || pnetelem
->ib_port
> SMC_MAX_PORTS
)
290 dev_put(pnetelem
->ndev
);
294 /* Convert an smc_pnetentry to a netlink attribute sequence */
295 static int smc_pnet_set_nla(struct sk_buff
*msg
, struct smc_pnetentry
*pnetelem
)
297 if (nla_put_string(msg
, SMC_PNETID_NAME
, pnetelem
->pnet_name
) ||
298 nla_put_string(msg
, SMC_PNETID_ETHNAME
, pnetelem
->ndev
->name
) ||
299 nla_put_string(msg
, SMC_PNETID_IBNAME
,
300 pnetelem
->smcibdev
->ibdev
->name
) ||
301 nla_put_u8(msg
, SMC_PNETID_IBPORT
, pnetelem
->ib_port
))
306 /* Retrieve one PNETID entry */
307 static int smc_pnet_get(struct sk_buff
*skb
, struct genl_info
*info
)
309 struct smc_pnetentry
*pnetelem
;
314 if (!info
->attrs
[SMC_PNETID_NAME
])
316 pnetelem
= smc_pnet_find_pnetid(
317 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
320 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
324 hdr
= genlmsg_put(msg
, info
->snd_portid
, info
->snd_seq
,
325 &smc_pnet_nl_family
, 0, SMC_PNETID_GET
);
331 if (smc_pnet_set_nla(msg
, pnetelem
)) {
336 genlmsg_end(msg
, hdr
);
337 return genlmsg_reply(msg
, info
);
344 static int smc_pnet_add(struct sk_buff
*skb
, struct genl_info
*info
)
346 struct net
*net
= genl_info_net(info
);
347 struct smc_pnetentry
*pnetelem
;
350 pnetelem
= kzalloc(sizeof(*pnetelem
), GFP_KERNEL
);
353 rc
= smc_pnet_fill_entry(net
, pnetelem
, info
->attrs
);
355 rc
= smc_pnet_enter(pnetelem
);
363 static int smc_pnet_del(struct sk_buff
*skb
, struct genl_info
*info
)
365 if (!info
->attrs
[SMC_PNETID_NAME
])
367 return smc_pnet_remove_by_pnetid(
368 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
371 static int smc_pnet_dump_start(struct netlink_callback
*cb
)
377 static int smc_pnet_dumpinfo(struct sk_buff
*skb
,
378 u32 portid
, u32 seq
, u32 flags
,
379 struct smc_pnetentry
*pnetelem
)
383 hdr
= genlmsg_put(skb
, portid
, seq
, &smc_pnet_nl_family
,
384 flags
, SMC_PNETID_GET
);
387 if (smc_pnet_set_nla(skb
, pnetelem
) < 0) {
388 genlmsg_cancel(skb
, hdr
);
391 genlmsg_end(skb
, hdr
);
395 static int smc_pnet_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
397 struct smc_pnetentry
*pnetelem
;
400 read_lock(&smc_pnettable
.lock
);
401 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
402 if (idx
++ < cb
->args
[0])
404 if (smc_pnet_dumpinfo(skb
, NETLINK_CB(cb
->skb
).portid
,
405 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
412 read_unlock(&smc_pnettable
.lock
);
416 /* Remove and delete all pnetids from pnet table.
418 static int smc_pnet_flush(struct sk_buff
*skb
, struct genl_info
*info
)
420 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
422 write_lock(&smc_pnettable
.lock
);
423 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
425 list_del(&pnetelem
->list
);
426 dev_put(pnetelem
->ndev
);
429 write_unlock(&smc_pnettable
.lock
);
433 /* SMC_PNETID generic netlink operation definition */
434 static const struct genl_ops smc_pnet_ops
[] = {
436 .cmd
= SMC_PNETID_GET
,
437 .flags
= GENL_ADMIN_PERM
,
438 .policy
= smc_pnet_policy
,
439 .doit
= smc_pnet_get
,
440 .dumpit
= smc_pnet_dump
,
441 .start
= smc_pnet_dump_start
444 .cmd
= SMC_PNETID_ADD
,
445 .flags
= GENL_ADMIN_PERM
,
446 .policy
= smc_pnet_policy
,
450 .cmd
= SMC_PNETID_DEL
,
451 .flags
= GENL_ADMIN_PERM
,
452 .policy
= smc_pnet_policy
,
456 .cmd
= SMC_PNETID_FLUSH
,
457 .flags
= GENL_ADMIN_PERM
,
458 .policy
= smc_pnet_policy
,
459 .doit
= smc_pnet_flush
463 /* SMC_PNETID family definition */
464 static struct genl_family smc_pnet_nl_family __ro_after_init
= {
466 .name
= SMCR_GENL_FAMILY_NAME
,
467 .version
= SMCR_GENL_FAMILY_VERSION
,
468 .maxattr
= SMC_PNETID_MAX
,
470 .module
= THIS_MODULE
,
472 .n_ops
= ARRAY_SIZE(smc_pnet_ops
)
475 static int smc_pnet_netdev_event(struct notifier_block
*this,
476 unsigned long event
, void *ptr
)
478 struct net_device
*event_dev
= netdev_notifier_info_to_dev(ptr
);
482 case NETDEV_UNREGISTER
:
483 smc_pnet_remove_by_ndev(event_dev
);
490 static struct notifier_block smc_netdev_notifier
= {
491 .notifier_call
= smc_pnet_netdev_event
494 int __init
smc_pnet_init(void)
498 rc
= genl_register_family(&smc_pnet_nl_family
);
501 rc
= register_netdevice_notifier(&smc_netdev_notifier
);
503 genl_unregister_family(&smc_pnet_nl_family
);
507 void smc_pnet_exit(void)
509 smc_pnet_flush(NULL
, NULL
);
510 unregister_netdevice_notifier(&smc_netdev_notifier
);
511 genl_unregister_family(&smc_pnet_nl_family
);
514 /* Determine one base device for stacked net devices.
515 * If the lower device level contains more than one devices
516 * (for instance with bonding slaves), just the first device
517 * is used to reach a base device.
519 static struct net_device
*pnet_find_base_ndev(struct net_device
*ndev
)
524 nest_lvl
= dev_get_nest_level(ndev
);
525 for (i
= 0; i
< nest_lvl
; i
++) {
526 struct list_head
*lower
= &ndev
->adj_list
.lower
;
528 if (list_empty(lower
))
531 ndev
= netdev_lower_get_next(ndev
, &lower
);
537 /* Determine the corresponding IB device port based on the hardware PNETID.
538 * Searching stops at the first matching active IB device port with vlan_id
541 static void smc_pnet_find_roce_by_pnetid(struct net_device
*ndev
,
542 struct smc_ib_device
**smcibdev
,
543 u8
*ibport
, unsigned short vlan_id
,
546 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
547 struct smc_ib_device
*ibdev
;
550 ndev
= pnet_find_base_ndev(ndev
);
551 if (smc_pnetid_by_dev_port(ndev
->dev
.parent
, ndev
->dev_port
,
553 return; /* pnetid could not be determined */
555 spin_lock(&smc_ib_devices
.lock
);
556 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
557 for (i
= 1; i
<= SMC_MAX_PORTS
; i
++) {
558 if (!rdma_is_port_valid(ibdev
->ibdev
, i
))
560 if (!memcmp(ibdev
->pnetid
[i
- 1], ndev_pnetid
,
561 SMC_MAX_PNETID_LEN
) &&
562 smc_ib_port_active(ibdev
, i
) &&
563 !smc_ib_determine_gid(ibdev
, i
, vlan_id
, gid
,
572 spin_unlock(&smc_ib_devices
.lock
);
575 static void smc_pnet_find_ism_by_pnetid(struct net_device
*ndev
,
576 struct smcd_dev
**smcismdev
)
578 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
579 struct smcd_dev
*ismdev
;
581 ndev
= pnet_find_base_ndev(ndev
);
582 if (smc_pnetid_by_dev_port(ndev
->dev
.parent
, ndev
->dev_port
,
584 return; /* pnetid could not be determined */
586 spin_lock(&smcd_dev_list
.lock
);
587 list_for_each_entry(ismdev
, &smcd_dev_list
.list
, list
) {
588 if (!memcmp(ismdev
->pnetid
, ndev_pnetid
, SMC_MAX_PNETID_LEN
)) {
593 spin_unlock(&smcd_dev_list
.lock
);
596 /* Lookup of coupled ib_device via SMC pnet table */
597 static void smc_pnet_find_roce_by_table(struct net_device
*netdev
,
598 struct smc_ib_device
**smcibdev
,
599 u8
*ibport
, unsigned short vlan_id
,
602 struct smc_pnetentry
*pnetelem
;
604 read_lock(&smc_pnettable
.lock
);
605 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
606 if (netdev
== pnetelem
->ndev
) {
607 if (smc_ib_port_active(pnetelem
->smcibdev
,
608 pnetelem
->ib_port
) &&
609 !smc_ib_determine_gid(pnetelem
->smcibdev
,
610 pnetelem
->ib_port
, vlan_id
,
612 *smcibdev
= pnetelem
->smcibdev
;
613 *ibport
= pnetelem
->ib_port
;
618 read_unlock(&smc_pnettable
.lock
);
621 /* PNET table analysis for a given sock:
622 * determine ib_device and port belonging to used internal TCP socket
623 * ethernet interface.
625 void smc_pnet_find_roce_resource(struct sock
*sk
,
626 struct smc_ib_device
**smcibdev
, u8
*ibport
,
627 unsigned short vlan_id
, u8 gid
[])
629 struct dst_entry
*dst
= sk_dst_get(sk
);
639 /* if possible, lookup via hardware-defined pnetid */
640 smc_pnet_find_roce_by_pnetid(dst
->dev
, smcibdev
, ibport
, vlan_id
, gid
);
644 /* lookup via SMC PNET table */
645 smc_pnet_find_roce_by_table(dst
->dev
, smcibdev
, ibport
, vlan_id
, gid
);
653 void smc_pnet_find_ism_resource(struct sock
*sk
, struct smcd_dev
**smcismdev
)
655 struct dst_entry
*dst
= sk_dst_get(sk
);
663 /* if possible, lookup via hardware-defined pnetid */
664 smc_pnet_find_ism_by_pnetid(dst
->dev
, smcismdev
);