2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 * Generic netlink support functions to configure an SMC-R PNET table
6 * Copyright IBM Corp. 2016
8 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
11 #include <linux/module.h>
12 #include <linux/list.h>
13 #include <linux/ctype.h>
14 #include <net/netlink.h>
15 #include <net/genetlink.h>
17 #include <uapi/linux/if.h>
18 #include <uapi/linux/smc.h>
20 #include <rdma/ib_verbs.h>
25 #define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */
27 static struct nla_policy smc_pnet_policy
[SMC_PNETID_MAX
+ 1] = {
29 .type
= NLA_NUL_STRING
,
30 .len
= SMC_MAX_PNET_ID_LEN
- 1
32 [SMC_PNETID_ETHNAME
] = {
33 .type
= NLA_NUL_STRING
,
36 [SMC_PNETID_IBNAME
] = {
37 .type
= NLA_NUL_STRING
,
38 .len
= IB_DEVICE_NAME_MAX
- 1
40 [SMC_PNETID_IBPORT
] = { .type
= NLA_U8
}
43 static struct genl_family smc_pnet_nl_family
;
46 * struct smc_pnettable - SMC PNET table anchor
47 * @lock: Lock for list action
48 * @pnetlist: List of PNETIDs
50 static struct smc_pnettable
{
52 struct list_head pnetlist
;
54 .pnetlist
= LIST_HEAD_INIT(smc_pnettable
.pnetlist
),
55 .lock
= __RW_LOCK_UNLOCKED(smc_pnettable
.lock
)
59 * struct smc_pnetentry - pnet identifier name entry
61 * @pnet_name: Pnet identifier name
62 * @ndev: pointer to network device.
63 * @smcibdev: Pointer to IB device.
65 struct smc_pnetentry
{
66 struct list_head list
;
67 char pnet_name
[SMC_MAX_PNET_ID_LEN
+ 1];
68 struct net_device
*ndev
;
69 struct smc_ib_device
*smcibdev
;
73 /* Check if two RDMA device entries are identical. Use device name and port
74 * number for comparison.
76 static bool smc_pnet_same_ibname(struct smc_pnetentry
*pnetelem
, char *ibname
,
79 return pnetelem
->ib_port
== ibport
&&
80 !strncmp(pnetelem
->smcibdev
->ibdev
->name
, ibname
,
81 sizeof(pnetelem
->smcibdev
->ibdev
->name
));
84 /* Find a pnetid in the pnet table.
86 static struct smc_pnetentry
*smc_pnet_find_pnetid(char *pnet_name
)
88 struct smc_pnetentry
*pnetelem
, *found_pnetelem
= NULL
;
90 read_lock(&smc_pnettable
.lock
);
91 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
92 if (!strncmp(pnetelem
->pnet_name
, pnet_name
,
93 sizeof(pnetelem
->pnet_name
))) {
94 found_pnetelem
= pnetelem
;
98 read_unlock(&smc_pnettable
.lock
);
99 return found_pnetelem
;
102 /* Remove a pnetid from the pnet table.
104 static int smc_pnet_remove_by_pnetid(char *pnet_name
)
106 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
109 write_lock(&smc_pnettable
.lock
);
110 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
112 if (!strncmp(pnetelem
->pnet_name
, pnet_name
,
113 sizeof(pnetelem
->pnet_name
))) {
114 list_del(&pnetelem
->list
);
115 dev_put(pnetelem
->ndev
);
121 write_unlock(&smc_pnettable
.lock
);
125 /* Remove a pnet entry mentioning a given network device from the pnet table.
127 static int smc_pnet_remove_by_ndev(struct net_device
*ndev
)
129 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
132 write_lock(&smc_pnettable
.lock
);
133 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
135 if (pnetelem
->ndev
== ndev
) {
136 list_del(&pnetelem
->list
);
137 dev_put(pnetelem
->ndev
);
143 write_unlock(&smc_pnettable
.lock
);
147 /* Remove a pnet entry mentioning a given ib device from the pnet table.
149 int smc_pnet_remove_by_ibdev(struct smc_ib_device
*ibdev
)
151 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
154 write_lock(&smc_pnettable
.lock
);
155 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
157 if (pnetelem
->smcibdev
== ibdev
) {
158 list_del(&pnetelem
->list
);
159 dev_put(pnetelem
->ndev
);
165 write_unlock(&smc_pnettable
.lock
);
169 /* Append a pnetid to the end of the pnet table if not already on this list.
171 static int smc_pnet_enter(struct smc_pnetentry
*new_pnetelem
)
173 struct smc_pnetentry
*pnetelem
;
176 write_lock(&smc_pnettable
.lock
);
177 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
178 if (!strncmp(pnetelem
->pnet_name
, new_pnetelem
->pnet_name
,
179 sizeof(new_pnetelem
->pnet_name
)) ||
180 !strncmp(pnetelem
->ndev
->name
, new_pnetelem
->ndev
->name
,
181 sizeof(new_pnetelem
->ndev
->name
)) ||
182 smc_pnet_same_ibname(pnetelem
,
183 new_pnetelem
->smcibdev
->ibdev
->name
,
184 new_pnetelem
->ib_port
))
187 list_add_tail(&new_pnetelem
->list
, &smc_pnettable
.pnetlist
);
190 write_unlock(&smc_pnettable
.lock
);
194 /* The limit for pnetid is 16 characters.
195 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
196 * Lower case letters are converted to upper case.
197 * Interior blanks should not be used.
199 static bool smc_pnetid_valid(const char *pnet_name
, char *pnetid
)
201 char *bf
= skip_spaces(pnet_name
);
202 size_t len
= strlen(bf
);
203 char *end
= bf
+ len
;
207 while (--end
>= bf
&& isspace(*end
))
209 if (end
- bf
>= SMC_MAX_PNET_ID_LEN
)
214 *pnetid
++ = islower(*bf
) ? toupper(*bf
) : *bf
;
221 /* Find an infiniband device by a given name. The device might not exist. */
222 static struct smc_ib_device
*smc_pnet_find_ib(char *ib_name
)
224 struct smc_ib_device
*ibdev
;
226 spin_lock(&smc_ib_devices
.lock
);
227 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
228 if (!strncmp(ibdev
->ibdev
->name
, ib_name
,
229 sizeof(ibdev
->ibdev
->name
))) {
235 spin_unlock(&smc_ib_devices
.lock
);
239 /* Parse the supplied netlink attributes and fill a pnetentry structure.
240 * For ethernet and infiniband device names verify that the devices exist.
242 static int smc_pnet_fill_entry(struct net
*net
, struct smc_pnetentry
*pnetelem
,
245 char *string
, *ibname
= NULL
;
248 memset(pnetelem
, 0, sizeof(*pnetelem
));
249 INIT_LIST_HEAD(&pnetelem
->list
);
250 if (tb
[SMC_PNETID_NAME
]) {
251 string
= (char *)nla_data(tb
[SMC_PNETID_NAME
]);
252 if (!smc_pnetid_valid(string
, pnetelem
->pnet_name
)) {
257 if (tb
[SMC_PNETID_ETHNAME
]) {
258 string
= (char *)nla_data(tb
[SMC_PNETID_ETHNAME
]);
259 pnetelem
->ndev
= dev_get_by_name(net
, string
);
263 if (tb
[SMC_PNETID_IBNAME
]) {
264 ibname
= (char *)nla_data(tb
[SMC_PNETID_IBNAME
]);
265 ibname
= strim(ibname
);
266 pnetelem
->smcibdev
= smc_pnet_find_ib(ibname
);
267 if (!pnetelem
->smcibdev
) {
272 if (tb
[SMC_PNETID_IBPORT
]) {
273 pnetelem
->ib_port
= nla_get_u8(tb
[SMC_PNETID_IBPORT
]);
274 if (pnetelem
->ib_port
> SMC_MAX_PORTS
) {
283 dev_put(pnetelem
->ndev
);
287 /* Convert an smc_pnetentry to a netlink attribute sequence */
288 static int smc_pnet_set_nla(struct sk_buff
*msg
, struct smc_pnetentry
*pnetelem
)
290 if (nla_put_string(msg
, SMC_PNETID_NAME
, pnetelem
->pnet_name
) ||
291 nla_put_string(msg
, SMC_PNETID_ETHNAME
, pnetelem
->ndev
->name
) ||
292 nla_put_string(msg
, SMC_PNETID_IBNAME
,
293 pnetelem
->smcibdev
->ibdev
->name
) ||
294 nla_put_u8(msg
, SMC_PNETID_IBPORT
, pnetelem
->ib_port
))
299 /* Retrieve one PNETID entry */
300 static int smc_pnet_get(struct sk_buff
*skb
, struct genl_info
*info
)
302 struct smc_pnetentry
*pnetelem
;
307 pnetelem
= smc_pnet_find_pnetid(
308 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
311 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
315 hdr
= genlmsg_put(msg
, info
->snd_portid
, info
->snd_seq
,
316 &smc_pnet_nl_family
, 0, SMC_PNETID_GET
);
322 if (smc_pnet_set_nla(msg
, pnetelem
)) {
327 genlmsg_end(msg
, hdr
);
328 return genlmsg_reply(msg
, info
);
335 static int smc_pnet_add(struct sk_buff
*skb
, struct genl_info
*info
)
337 struct net
*net
= genl_info_net(info
);
338 struct smc_pnetentry
*pnetelem
;
341 pnetelem
= kzalloc(sizeof(*pnetelem
), GFP_KERNEL
);
344 rc
= smc_pnet_fill_entry(net
, pnetelem
, info
->attrs
);
346 rc
= smc_pnet_enter(pnetelem
);
351 rc
= smc_ib_remember_port_attr(pnetelem
->smcibdev
, pnetelem
->ib_port
);
353 smc_pnet_remove_by_pnetid(pnetelem
->pnet_name
);
357 static int smc_pnet_del(struct sk_buff
*skb
, struct genl_info
*info
)
359 return smc_pnet_remove_by_pnetid(
360 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
363 static int smc_pnet_dump_start(struct netlink_callback
*cb
)
369 static int smc_pnet_dumpinfo(struct sk_buff
*skb
,
370 u32 portid
, u32 seq
, u32 flags
,
371 struct smc_pnetentry
*pnetelem
)
375 hdr
= genlmsg_put(skb
, portid
, seq
, &smc_pnet_nl_family
,
376 flags
, SMC_PNETID_GET
);
379 if (smc_pnet_set_nla(skb
, pnetelem
) < 0) {
380 genlmsg_cancel(skb
, hdr
);
383 genlmsg_end(skb
, hdr
);
387 static int smc_pnet_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
389 struct smc_pnetentry
*pnetelem
;
392 read_lock(&smc_pnettable
.lock
);
393 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
394 if (idx
++ < cb
->args
[0])
396 if (smc_pnet_dumpinfo(skb
, NETLINK_CB(cb
->skb
).portid
,
397 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
404 read_unlock(&smc_pnettable
.lock
);
408 /* Remove and delete all pnetids from pnet table.
410 static int smc_pnet_flush(struct sk_buff
*skb
, struct genl_info
*info
)
412 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
414 write_lock(&smc_pnettable
.lock
);
415 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
417 list_del(&pnetelem
->list
);
418 dev_put(pnetelem
->ndev
);
421 write_unlock(&smc_pnettable
.lock
);
425 /* SMC_PNETID generic netlink operation definition */
426 static const struct genl_ops smc_pnet_ops
[] = {
428 .cmd
= SMC_PNETID_GET
,
429 .flags
= GENL_ADMIN_PERM
,
430 .policy
= smc_pnet_policy
,
431 .doit
= smc_pnet_get
,
432 .dumpit
= smc_pnet_dump
,
433 .start
= smc_pnet_dump_start
436 .cmd
= SMC_PNETID_ADD
,
437 .flags
= GENL_ADMIN_PERM
,
438 .policy
= smc_pnet_policy
,
442 .cmd
= SMC_PNETID_DEL
,
443 .flags
= GENL_ADMIN_PERM
,
444 .policy
= smc_pnet_policy
,
448 .cmd
= SMC_PNETID_FLUSH
,
449 .flags
= GENL_ADMIN_PERM
,
450 .policy
= smc_pnet_policy
,
451 .doit
= smc_pnet_flush
455 /* SMC_PNETID family definition */
456 static struct genl_family smc_pnet_nl_family
= {
458 .name
= SMCR_GENL_FAMILY_NAME
,
459 .version
= SMCR_GENL_FAMILY_VERSION
,
460 .maxattr
= SMC_PNETID_MAX
,
462 .module
= THIS_MODULE
,
464 .n_ops
= ARRAY_SIZE(smc_pnet_ops
)
467 static int smc_pnet_netdev_event(struct notifier_block
*this,
468 unsigned long event
, void *ptr
)
470 struct net_device
*event_dev
= netdev_notifier_info_to_dev(ptr
);
474 case NETDEV_UNREGISTER
:
475 smc_pnet_remove_by_ndev(event_dev
);
482 static struct notifier_block smc_netdev_notifier
= {
483 .notifier_call
= smc_pnet_netdev_event
486 int __init
smc_pnet_init(void)
490 rc
= genl_register_family(&smc_pnet_nl_family
);
493 rc
= register_netdevice_notifier(&smc_netdev_notifier
);
495 genl_unregister_family(&smc_pnet_nl_family
);
499 void smc_pnet_exit(void)
501 smc_pnet_flush(NULL
, NULL
);
502 unregister_netdevice_notifier(&smc_netdev_notifier
);
503 genl_unregister_family(&smc_pnet_nl_family
);
506 /* PNET table analysis for a given sock:
507 * determine ib_device and port belonging to used internal TCP socket
508 * ethernet interface.
510 void smc_pnet_find_roce_resource(struct sock
*sk
,
511 struct smc_ib_device
**smcibdev
, u8
*ibport
)
513 struct dst_entry
*dst
= sk_dst_get(sk
);
514 struct smc_pnetentry
*pnetelem
;
523 read_lock(&smc_pnettable
.lock
);
524 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
525 if (dst
->dev
== pnetelem
->ndev
) {
526 if (smc_ib_port_active(pnetelem
->smcibdev
,
527 pnetelem
->ib_port
)) {
528 *smcibdev
= pnetelem
->smcibdev
;
529 *ibport
= pnetelem
->ib_port
;
534 read_unlock(&smc_pnettable
.lock
);