1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Generic netlink support functions to configure an SMC-R PNET table
7 * Copyright IBM Corp. 2016
9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <net/netlink.h>
16 #include <net/genetlink.h>
18 #include <uapi/linux/if.h>
19 #include <uapi/linux/smc.h>
21 #include <rdma/ib_verbs.h>
26 #define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */
28 static struct nla_policy smc_pnet_policy
[SMC_PNETID_MAX
+ 1] = {
30 .type
= NLA_NUL_STRING
,
31 .len
= SMC_MAX_PNET_ID_LEN
- 1
33 [SMC_PNETID_ETHNAME
] = {
34 .type
= NLA_NUL_STRING
,
37 [SMC_PNETID_IBNAME
] = {
38 .type
= NLA_NUL_STRING
,
39 .len
= IB_DEVICE_NAME_MAX
- 1
41 [SMC_PNETID_IBPORT
] = { .type
= NLA_U8
}
44 static struct genl_family smc_pnet_nl_family
;
47 * struct smc_pnettable - SMC PNET table anchor
48 * @lock: Lock for list action
49 * @pnetlist: List of PNETIDs
51 static struct smc_pnettable
{
53 struct list_head pnetlist
;
55 .pnetlist
= LIST_HEAD_INIT(smc_pnettable
.pnetlist
),
56 .lock
= __RW_LOCK_UNLOCKED(smc_pnettable
.lock
)
60 * struct smc_pnetentry - pnet identifier name entry
62 * @pnet_name: Pnet identifier name
63 * @ndev: pointer to network device.
64 * @smcibdev: Pointer to IB device.
66 struct smc_pnetentry
{
67 struct list_head list
;
68 char pnet_name
[SMC_MAX_PNET_ID_LEN
+ 1];
69 struct net_device
*ndev
;
70 struct smc_ib_device
*smcibdev
;
74 /* Check if two RDMA device entries are identical. Use device name and port
75 * number for comparison.
77 static bool smc_pnet_same_ibname(struct smc_pnetentry
*pnetelem
, char *ibname
,
80 return pnetelem
->ib_port
== ibport
&&
81 !strncmp(pnetelem
->smcibdev
->ibdev
->name
, ibname
,
82 sizeof(pnetelem
->smcibdev
->ibdev
->name
));
85 /* Find a pnetid in the pnet table.
87 static struct smc_pnetentry
*smc_pnet_find_pnetid(char *pnet_name
)
89 struct smc_pnetentry
*pnetelem
, *found_pnetelem
= NULL
;
91 read_lock(&smc_pnettable
.lock
);
92 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
93 if (!strncmp(pnetelem
->pnet_name
, pnet_name
,
94 sizeof(pnetelem
->pnet_name
))) {
95 found_pnetelem
= pnetelem
;
99 read_unlock(&smc_pnettable
.lock
);
100 return found_pnetelem
;
103 /* Remove a pnetid from the pnet table.
105 static int smc_pnet_remove_by_pnetid(char *pnet_name
)
107 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
110 write_lock(&smc_pnettable
.lock
);
111 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
113 if (!strncmp(pnetelem
->pnet_name
, pnet_name
,
114 sizeof(pnetelem
->pnet_name
))) {
115 list_del(&pnetelem
->list
);
116 dev_put(pnetelem
->ndev
);
122 write_unlock(&smc_pnettable
.lock
);
126 /* Remove a pnet entry mentioning a given network device from the pnet table.
128 static int smc_pnet_remove_by_ndev(struct net_device
*ndev
)
130 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
133 write_lock(&smc_pnettable
.lock
);
134 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
136 if (pnetelem
->ndev
== ndev
) {
137 list_del(&pnetelem
->list
);
138 dev_put(pnetelem
->ndev
);
144 write_unlock(&smc_pnettable
.lock
);
148 /* Remove a pnet entry mentioning a given ib device from the pnet table.
150 int smc_pnet_remove_by_ibdev(struct smc_ib_device
*ibdev
)
152 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
155 write_lock(&smc_pnettable
.lock
);
156 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
158 if (pnetelem
->smcibdev
== ibdev
) {
159 list_del(&pnetelem
->list
);
160 dev_put(pnetelem
->ndev
);
166 write_unlock(&smc_pnettable
.lock
);
170 /* Append a pnetid to the end of the pnet table if not already on this list.
172 static int smc_pnet_enter(struct smc_pnetentry
*new_pnetelem
)
174 struct smc_pnetentry
*pnetelem
;
177 write_lock(&smc_pnettable
.lock
);
178 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
179 if (!strncmp(pnetelem
->pnet_name
, new_pnetelem
->pnet_name
,
180 sizeof(new_pnetelem
->pnet_name
)) ||
181 !strncmp(pnetelem
->ndev
->name
, new_pnetelem
->ndev
->name
,
182 sizeof(new_pnetelem
->ndev
->name
)) ||
183 smc_pnet_same_ibname(pnetelem
,
184 new_pnetelem
->smcibdev
->ibdev
->name
,
185 new_pnetelem
->ib_port
)) {
186 dev_put(pnetelem
->ndev
);
190 list_add_tail(&new_pnetelem
->list
, &smc_pnettable
.pnetlist
);
193 write_unlock(&smc_pnettable
.lock
);
197 /* The limit for pnetid is 16 characters.
198 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
199 * Lower case letters are converted to upper case.
200 * Interior blanks should not be used.
202 static bool smc_pnetid_valid(const char *pnet_name
, char *pnetid
)
204 char *bf
= skip_spaces(pnet_name
);
205 size_t len
= strlen(bf
);
206 char *end
= bf
+ len
;
210 while (--end
>= bf
&& isspace(*end
))
212 if (end
- bf
>= SMC_MAX_PNET_ID_LEN
)
217 *pnetid
++ = islower(*bf
) ? toupper(*bf
) : *bf
;
224 /* Find an infiniband device by a given name. The device might not exist. */
225 static struct smc_ib_device
*smc_pnet_find_ib(char *ib_name
)
227 struct smc_ib_device
*ibdev
;
229 spin_lock(&smc_ib_devices
.lock
);
230 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
231 if (!strncmp(ibdev
->ibdev
->name
, ib_name
,
232 sizeof(ibdev
->ibdev
->name
))) {
238 spin_unlock(&smc_ib_devices
.lock
);
242 /* Parse the supplied netlink attributes and fill a pnetentry structure.
243 * For ethernet and infiniband device names verify that the devices exist.
245 static int smc_pnet_fill_entry(struct net
*net
, struct smc_pnetentry
*pnetelem
,
248 char *string
, *ibname
= NULL
;
251 memset(pnetelem
, 0, sizeof(*pnetelem
));
252 INIT_LIST_HEAD(&pnetelem
->list
);
253 if (tb
[SMC_PNETID_NAME
]) {
254 string
= (char *)nla_data(tb
[SMC_PNETID_NAME
]);
255 if (!smc_pnetid_valid(string
, pnetelem
->pnet_name
)) {
260 if (tb
[SMC_PNETID_ETHNAME
]) {
261 string
= (char *)nla_data(tb
[SMC_PNETID_ETHNAME
]);
262 pnetelem
->ndev
= dev_get_by_name(net
, string
);
266 if (tb
[SMC_PNETID_IBNAME
]) {
267 ibname
= (char *)nla_data(tb
[SMC_PNETID_IBNAME
]);
268 ibname
= strim(ibname
);
269 pnetelem
->smcibdev
= smc_pnet_find_ib(ibname
);
270 if (!pnetelem
->smcibdev
) {
275 if (tb
[SMC_PNETID_IBPORT
]) {
276 pnetelem
->ib_port
= nla_get_u8(tb
[SMC_PNETID_IBPORT
]);
277 if (pnetelem
->ib_port
> SMC_MAX_PORTS
) {
286 dev_put(pnetelem
->ndev
);
290 /* Convert an smc_pnetentry to a netlink attribute sequence */
291 static int smc_pnet_set_nla(struct sk_buff
*msg
, struct smc_pnetentry
*pnetelem
)
293 if (nla_put_string(msg
, SMC_PNETID_NAME
, pnetelem
->pnet_name
) ||
294 nla_put_string(msg
, SMC_PNETID_ETHNAME
, pnetelem
->ndev
->name
) ||
295 nla_put_string(msg
, SMC_PNETID_IBNAME
,
296 pnetelem
->smcibdev
->ibdev
->name
) ||
297 nla_put_u8(msg
, SMC_PNETID_IBPORT
, pnetelem
->ib_port
))
302 /* Retrieve one PNETID entry */
303 static int smc_pnet_get(struct sk_buff
*skb
, struct genl_info
*info
)
305 struct smc_pnetentry
*pnetelem
;
310 pnetelem
= smc_pnet_find_pnetid(
311 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
314 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
318 hdr
= genlmsg_put(msg
, info
->snd_portid
, info
->snd_seq
,
319 &smc_pnet_nl_family
, 0, SMC_PNETID_GET
);
325 if (smc_pnet_set_nla(msg
, pnetelem
)) {
330 genlmsg_end(msg
, hdr
);
331 return genlmsg_reply(msg
, info
);
338 static int smc_pnet_add(struct sk_buff
*skb
, struct genl_info
*info
)
340 struct net
*net
= genl_info_net(info
);
341 struct smc_pnetentry
*pnetelem
;
344 pnetelem
= kzalloc(sizeof(*pnetelem
), GFP_KERNEL
);
347 rc
= smc_pnet_fill_entry(net
, pnetelem
, info
->attrs
);
349 rc
= smc_pnet_enter(pnetelem
);
354 rc
= smc_ib_remember_port_attr(pnetelem
->smcibdev
, pnetelem
->ib_port
);
356 smc_pnet_remove_by_pnetid(pnetelem
->pnet_name
);
360 static int smc_pnet_del(struct sk_buff
*skb
, struct genl_info
*info
)
362 return smc_pnet_remove_by_pnetid(
363 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
366 static int smc_pnet_dump_start(struct netlink_callback
*cb
)
372 static int smc_pnet_dumpinfo(struct sk_buff
*skb
,
373 u32 portid
, u32 seq
, u32 flags
,
374 struct smc_pnetentry
*pnetelem
)
378 hdr
= genlmsg_put(skb
, portid
, seq
, &smc_pnet_nl_family
,
379 flags
, SMC_PNETID_GET
);
382 if (smc_pnet_set_nla(skb
, pnetelem
) < 0) {
383 genlmsg_cancel(skb
, hdr
);
386 genlmsg_end(skb
, hdr
);
390 static int smc_pnet_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
392 struct smc_pnetentry
*pnetelem
;
395 read_lock(&smc_pnettable
.lock
);
396 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
397 if (idx
++ < cb
->args
[0])
399 if (smc_pnet_dumpinfo(skb
, NETLINK_CB(cb
->skb
).portid
,
400 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
407 read_unlock(&smc_pnettable
.lock
);
411 /* Remove and delete all pnetids from pnet table.
413 static int smc_pnet_flush(struct sk_buff
*skb
, struct genl_info
*info
)
415 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
417 write_lock(&smc_pnettable
.lock
);
418 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
420 list_del(&pnetelem
->list
);
421 dev_put(pnetelem
->ndev
);
424 write_unlock(&smc_pnettable
.lock
);
428 /* SMC_PNETID generic netlink operation definition */
429 static const struct genl_ops smc_pnet_ops
[] = {
431 .cmd
= SMC_PNETID_GET
,
432 .flags
= GENL_ADMIN_PERM
,
433 .policy
= smc_pnet_policy
,
434 .doit
= smc_pnet_get
,
435 .dumpit
= smc_pnet_dump
,
436 .start
= smc_pnet_dump_start
439 .cmd
= SMC_PNETID_ADD
,
440 .flags
= GENL_ADMIN_PERM
,
441 .policy
= smc_pnet_policy
,
445 .cmd
= SMC_PNETID_DEL
,
446 .flags
= GENL_ADMIN_PERM
,
447 .policy
= smc_pnet_policy
,
451 .cmd
= SMC_PNETID_FLUSH
,
452 .flags
= GENL_ADMIN_PERM
,
453 .policy
= smc_pnet_policy
,
454 .doit
= smc_pnet_flush
458 /* SMC_PNETID family definition */
459 static struct genl_family smc_pnet_nl_family
= {
461 .name
= SMCR_GENL_FAMILY_NAME
,
462 .version
= SMCR_GENL_FAMILY_VERSION
,
463 .maxattr
= SMC_PNETID_MAX
,
465 .module
= THIS_MODULE
,
467 .n_ops
= ARRAY_SIZE(smc_pnet_ops
)
470 static int smc_pnet_netdev_event(struct notifier_block
*this,
471 unsigned long event
, void *ptr
)
473 struct net_device
*event_dev
= netdev_notifier_info_to_dev(ptr
);
477 case NETDEV_UNREGISTER
:
478 smc_pnet_remove_by_ndev(event_dev
);
485 static struct notifier_block smc_netdev_notifier
= {
486 .notifier_call
= smc_pnet_netdev_event
489 int __init
smc_pnet_init(void)
493 rc
= genl_register_family(&smc_pnet_nl_family
);
496 rc
= register_netdevice_notifier(&smc_netdev_notifier
);
498 genl_unregister_family(&smc_pnet_nl_family
);
502 void smc_pnet_exit(void)
504 smc_pnet_flush(NULL
, NULL
);
505 unregister_netdevice_notifier(&smc_netdev_notifier
);
506 genl_unregister_family(&smc_pnet_nl_family
);
509 /* PNET table analysis for a given sock:
510 * determine ib_device and port belonging to used internal TCP socket
511 * ethernet interface.
513 void smc_pnet_find_roce_resource(struct sock
*sk
,
514 struct smc_ib_device
**smcibdev
, u8
*ibport
)
516 struct dst_entry
*dst
= sk_dst_get(sk
);
517 struct smc_pnetentry
*pnetelem
;
526 read_lock(&smc_pnettable
.lock
);
527 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
528 if (dst
->dev
== pnetelem
->ndev
) {
529 if (smc_ib_port_active(pnetelem
->smcibdev
,
530 pnetelem
->ib_port
)) {
531 *smcibdev
= pnetelem
->smcibdev
;
532 *ibport
= pnetelem
->ib_port
;
537 read_unlock(&smc_pnettable
.lock
);