2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. Neither the names of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * Alternatively, this software may be distributed under the terms of the
17 * GNU General Public License ("GPL") version 2 as published by the Free
18 * Software Foundation.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
41 #include "core_priv.h"
47 * This determines whether a non-privileged user is allowed to specify a
48 * controlled QKEY or not, when true non-privileged user is allowed to specify
51 static bool privileged_qkey
;
53 typedef int (*res_fill_func_t
)(struct sk_buff
*, bool,
54 struct rdma_restrack_entry
*, uint32_t);
57 * Sort array elements by the netlink attribute name
59 static const struct nla_policy nldev_policy
[RDMA_NLDEV_ATTR_MAX
] = {
60 [RDMA_NLDEV_ATTR_CHARDEV
] = { .type
= NLA_U64
},
61 [RDMA_NLDEV_ATTR_CHARDEV_ABI
] = { .type
= NLA_U64
},
62 [RDMA_NLDEV_ATTR_CHARDEV_NAME
] = { .type
= NLA_NUL_STRING
,
63 .len
= RDMA_NLDEV_ATTR_EMPTY_STRING
},
64 [RDMA_NLDEV_ATTR_CHARDEV_TYPE
] = { .type
= NLA_NUL_STRING
,
65 .len
= RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE
},
66 [RDMA_NLDEV_ATTR_DEV_DIM
] = { .type
= NLA_U8
},
67 [RDMA_NLDEV_ATTR_DEV_INDEX
] = { .type
= NLA_U32
},
68 [RDMA_NLDEV_ATTR_DEV_NAME
] = { .type
= NLA_NUL_STRING
,
69 .len
= IB_DEVICE_NAME_MAX
},
70 [RDMA_NLDEV_ATTR_DEV_NODE_TYPE
] = { .type
= NLA_U8
},
71 [RDMA_NLDEV_ATTR_DEV_PROTOCOL
] = { .type
= NLA_NUL_STRING
,
72 .len
= RDMA_NLDEV_ATTR_EMPTY_STRING
},
73 [RDMA_NLDEV_ATTR_DRIVER
] = { .type
= NLA_NESTED
},
74 [RDMA_NLDEV_ATTR_DRIVER_ENTRY
] = { .type
= NLA_NESTED
},
75 [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE
] = { .type
= NLA_U8
},
76 [RDMA_NLDEV_ATTR_DRIVER_STRING
] = { .type
= NLA_NUL_STRING
,
77 .len
= RDMA_NLDEV_ATTR_EMPTY_STRING
},
78 [RDMA_NLDEV_ATTR_DRIVER_S32
] = { .type
= NLA_S32
},
79 [RDMA_NLDEV_ATTR_DRIVER_S64
] = { .type
= NLA_S64
},
80 [RDMA_NLDEV_ATTR_DRIVER_U32
] = { .type
= NLA_U32
},
81 [RDMA_NLDEV_ATTR_DRIVER_U64
] = { .type
= NLA_U64
},
82 [RDMA_NLDEV_ATTR_FW_VERSION
] = { .type
= NLA_NUL_STRING
,
83 .len
= RDMA_NLDEV_ATTR_EMPTY_STRING
},
84 [RDMA_NLDEV_ATTR_LID
] = { .type
= NLA_U32
},
85 [RDMA_NLDEV_ATTR_LINK_TYPE
] = { .type
= NLA_NUL_STRING
,
87 [RDMA_NLDEV_ATTR_LMC
] = { .type
= NLA_U8
},
88 [RDMA_NLDEV_ATTR_NDEV_INDEX
] = { .type
= NLA_U32
},
89 [RDMA_NLDEV_ATTR_NDEV_NAME
] = { .type
= NLA_NUL_STRING
,
91 [RDMA_NLDEV_ATTR_NODE_GUID
] = { .type
= NLA_U64
},
92 [RDMA_NLDEV_ATTR_PORT_INDEX
] = { .type
= NLA_U32
},
93 [RDMA_NLDEV_ATTR_PORT_PHYS_STATE
] = { .type
= NLA_U8
},
94 [RDMA_NLDEV_ATTR_PORT_STATE
] = { .type
= NLA_U8
},
95 [RDMA_NLDEV_ATTR_RES_CM_ID
] = { .type
= NLA_NESTED
},
96 [RDMA_NLDEV_ATTR_RES_CM_IDN
] = { .type
= NLA_U32
},
97 [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY
] = { .type
= NLA_NESTED
},
98 [RDMA_NLDEV_ATTR_RES_CQ
] = { .type
= NLA_NESTED
},
99 [RDMA_NLDEV_ATTR_RES_CQE
] = { .type
= NLA_U32
},
100 [RDMA_NLDEV_ATTR_RES_CQN
] = { .type
= NLA_U32
},
101 [RDMA_NLDEV_ATTR_RES_CQ_ENTRY
] = { .type
= NLA_NESTED
},
102 [RDMA_NLDEV_ATTR_RES_CTX
] = { .type
= NLA_NESTED
},
103 [RDMA_NLDEV_ATTR_RES_CTXN
] = { .type
= NLA_U32
},
104 [RDMA_NLDEV_ATTR_RES_CTX_ENTRY
] = { .type
= NLA_NESTED
},
105 [RDMA_NLDEV_ATTR_RES_DST_ADDR
] = {
106 .len
= sizeof(struct __kernel_sockaddr_storage
) },
107 [RDMA_NLDEV_ATTR_RES_IOVA
] = { .type
= NLA_U64
},
108 [RDMA_NLDEV_ATTR_RES_KERN_NAME
] = { .type
= NLA_NUL_STRING
,
109 .len
= RDMA_NLDEV_ATTR_EMPTY_STRING
},
110 [RDMA_NLDEV_ATTR_RES_LKEY
] = { .type
= NLA_U32
},
111 [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY
] = { .type
= NLA_U32
},
112 [RDMA_NLDEV_ATTR_RES_LQPN
] = { .type
= NLA_U32
},
113 [RDMA_NLDEV_ATTR_RES_MR
] = { .type
= NLA_NESTED
},
114 [RDMA_NLDEV_ATTR_RES_MRLEN
] = { .type
= NLA_U64
},
115 [RDMA_NLDEV_ATTR_RES_MRN
] = { .type
= NLA_U32
},
116 [RDMA_NLDEV_ATTR_RES_MR_ENTRY
] = { .type
= NLA_NESTED
},
117 [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE
] = { .type
= NLA_U8
},
118 [RDMA_NLDEV_ATTR_RES_PD
] = { .type
= NLA_NESTED
},
119 [RDMA_NLDEV_ATTR_RES_PDN
] = { .type
= NLA_U32
},
120 [RDMA_NLDEV_ATTR_RES_PD_ENTRY
] = { .type
= NLA_NESTED
},
121 [RDMA_NLDEV_ATTR_RES_PID
] = { .type
= NLA_U32
},
122 [RDMA_NLDEV_ATTR_RES_POLL_CTX
] = { .type
= NLA_U8
},
123 [RDMA_NLDEV_ATTR_RES_PS
] = { .type
= NLA_U32
},
124 [RDMA_NLDEV_ATTR_RES_QP
] = { .type
= NLA_NESTED
},
125 [RDMA_NLDEV_ATTR_RES_QP_ENTRY
] = { .type
= NLA_NESTED
},
126 [RDMA_NLDEV_ATTR_RES_RAW
] = { .type
= NLA_BINARY
},
127 [RDMA_NLDEV_ATTR_RES_RKEY
] = { .type
= NLA_U32
},
128 [RDMA_NLDEV_ATTR_RES_RQPN
] = { .type
= NLA_U32
},
129 [RDMA_NLDEV_ATTR_RES_RQ_PSN
] = { .type
= NLA_U32
},
130 [RDMA_NLDEV_ATTR_RES_SQ_PSN
] = { .type
= NLA_U32
},
131 [RDMA_NLDEV_ATTR_RES_SRC_ADDR
] = {
132 .len
= sizeof(struct __kernel_sockaddr_storage
) },
133 [RDMA_NLDEV_ATTR_RES_STATE
] = { .type
= NLA_U8
},
134 [RDMA_NLDEV_ATTR_RES_SUMMARY
] = { .type
= NLA_NESTED
},
135 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY
] = { .type
= NLA_NESTED
},
136 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR
]= { .type
= NLA_U64
},
137 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME
]= { .type
= NLA_NUL_STRING
,
138 .len
= RDMA_NLDEV_ATTR_EMPTY_STRING
},
139 [RDMA_NLDEV_ATTR_RES_TYPE
] = { .type
= NLA_U8
},
140 [RDMA_NLDEV_ATTR_RES_SUBTYPE
] = { .type
= NLA_NUL_STRING
,
141 .len
= RDMA_NLDEV_ATTR_EMPTY_STRING
},
142 [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY
]= { .type
= NLA_U32
},
143 [RDMA_NLDEV_ATTR_RES_USECNT
] = { .type
= NLA_U64
},
144 [RDMA_NLDEV_ATTR_RES_SRQ
] = { .type
= NLA_NESTED
},
145 [RDMA_NLDEV_ATTR_RES_SRQN
] = { .type
= NLA_U32
},
146 [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY
] = { .type
= NLA_NESTED
},
147 [RDMA_NLDEV_ATTR_MIN_RANGE
] = { .type
= NLA_U32
},
148 [RDMA_NLDEV_ATTR_MAX_RANGE
] = { .type
= NLA_U32
},
149 [RDMA_NLDEV_ATTR_SM_LID
] = { .type
= NLA_U32
},
150 [RDMA_NLDEV_ATTR_SUBNET_PREFIX
] = { .type
= NLA_U64
},
151 [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK
] = { .type
= NLA_U32
},
152 [RDMA_NLDEV_ATTR_STAT_MODE
] = { .type
= NLA_U32
},
153 [RDMA_NLDEV_ATTR_STAT_RES
] = { .type
= NLA_U32
},
154 [RDMA_NLDEV_ATTR_STAT_COUNTER
] = { .type
= NLA_NESTED
},
155 [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY
] = { .type
= NLA_NESTED
},
156 [RDMA_NLDEV_ATTR_STAT_COUNTER_ID
] = { .type
= NLA_U32
},
157 [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS
] = { .type
= NLA_NESTED
},
158 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY
] = { .type
= NLA_NESTED
},
159 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME
] = { .type
= NLA_NUL_STRING
},
160 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE
] = { .type
= NLA_U64
},
161 [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID
] = { .type
= NLA_U64
},
162 [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID
] = { .type
= NLA_U32
},
163 [RDMA_NLDEV_NET_NS_FD
] = { .type
= NLA_U32
},
164 [RDMA_NLDEV_SYS_ATTR_NETNS_MODE
] = { .type
= NLA_U8
},
165 [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK
] = { .type
= NLA_U8
},
166 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX
] = { .type
= NLA_U32
},
167 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC
] = { .type
= NLA_U8
},
168 [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE
] = { .type
= NLA_U8
},
169 [RDMA_NLDEV_ATTR_DRIVER_DETAILS
] = { .type
= NLA_U8
},
170 [RDMA_NLDEV_ATTR_DEV_TYPE
] = { .type
= NLA_U8
},
171 [RDMA_NLDEV_ATTR_PARENT_NAME
] = { .type
= NLA_NUL_STRING
},
172 [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE
] = { .type
= NLA_U8
},
173 [RDMA_NLDEV_ATTR_EVENT_TYPE
] = { .type
= NLA_U8
},
176 static int put_driver_name_print_type(struct sk_buff
*msg
, const char *name
,
177 enum rdma_nldev_print_type print_type
)
179 if (nla_put_string(msg
, RDMA_NLDEV_ATTR_DRIVER_STRING
, name
))
181 if (print_type
!= RDMA_NLDEV_PRINT_TYPE_UNSPEC
&&
182 nla_put_u8(msg
, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE
, print_type
))
188 static int _rdma_nl_put_driver_u32(struct sk_buff
*msg
, const char *name
,
189 enum rdma_nldev_print_type print_type
,
192 if (put_driver_name_print_type(msg
, name
, print_type
))
194 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_DRIVER_U32
, value
))
200 static int _rdma_nl_put_driver_u64(struct sk_buff
*msg
, const char *name
,
201 enum rdma_nldev_print_type print_type
,
204 if (put_driver_name_print_type(msg
, name
, print_type
))
206 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_DRIVER_U64
, value
,
207 RDMA_NLDEV_ATTR_PAD
))
213 int rdma_nl_put_driver_string(struct sk_buff
*msg
, const char *name
,
216 if (put_driver_name_print_type(msg
, name
,
217 RDMA_NLDEV_PRINT_TYPE_UNSPEC
))
219 if (nla_put_string(msg
, RDMA_NLDEV_ATTR_DRIVER_STRING
, str
))
224 EXPORT_SYMBOL(rdma_nl_put_driver_string
);
226 int rdma_nl_put_driver_u32(struct sk_buff
*msg
, const char *name
, u32 value
)
228 return _rdma_nl_put_driver_u32(msg
, name
, RDMA_NLDEV_PRINT_TYPE_UNSPEC
,
231 EXPORT_SYMBOL(rdma_nl_put_driver_u32
);
233 int rdma_nl_put_driver_u32_hex(struct sk_buff
*msg
, const char *name
,
236 return _rdma_nl_put_driver_u32(msg
, name
, RDMA_NLDEV_PRINT_TYPE_HEX
,
239 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex
);
241 int rdma_nl_put_driver_u64(struct sk_buff
*msg
, const char *name
, u64 value
)
243 return _rdma_nl_put_driver_u64(msg
, name
, RDMA_NLDEV_PRINT_TYPE_UNSPEC
,
246 EXPORT_SYMBOL(rdma_nl_put_driver_u64
);
248 int rdma_nl_put_driver_u64_hex(struct sk_buff
*msg
, const char *name
, u64 value
)
250 return _rdma_nl_put_driver_u64(msg
, name
, RDMA_NLDEV_PRINT_TYPE_HEX
,
253 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex
);
255 bool rdma_nl_get_privileged_qkey(void)
257 return privileged_qkey
|| capable(CAP_NET_RAW
);
259 EXPORT_SYMBOL(rdma_nl_get_privileged_qkey
);
261 static int fill_nldev_handle(struct sk_buff
*msg
, struct ib_device
*device
)
263 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_DEV_INDEX
, device
->index
))
265 if (nla_put_string(msg
, RDMA_NLDEV_ATTR_DEV_NAME
,
266 dev_name(&device
->dev
)))
272 static int fill_dev_info(struct sk_buff
*msg
, struct ib_device
*device
)
274 char fw
[IB_FW_VERSION_NAME_MAX
];
278 if (fill_nldev_handle(msg
, device
))
281 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, rdma_end_port(device
)))
284 BUILD_BUG_ON(sizeof(device
->attrs
.device_cap_flags
) != sizeof(u64
));
285 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_CAP_FLAGS
,
286 device
->attrs
.device_cap_flags
,
287 RDMA_NLDEV_ATTR_PAD
))
290 ib_get_device_fw_str(device
, fw
);
291 /* Device without FW has strlen(fw) = 0 */
292 if (strlen(fw
) && nla_put_string(msg
, RDMA_NLDEV_ATTR_FW_VERSION
, fw
))
295 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_NODE_GUID
,
296 be64_to_cpu(device
->node_guid
),
297 RDMA_NLDEV_ATTR_PAD
))
299 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID
,
300 be64_to_cpu(device
->attrs
.sys_image_guid
),
301 RDMA_NLDEV_ATTR_PAD
))
303 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_DEV_NODE_TYPE
, device
->node_type
))
305 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_DEV_DIM
, device
->use_cq_dim
))
309 nla_put_u8(msg
, RDMA_NLDEV_ATTR_DEV_TYPE
, device
->type
))
312 if (device
->parent
&&
313 nla_put_string(msg
, RDMA_NLDEV_ATTR_PARENT_NAME
,
314 dev_name(&device
->parent
->dev
)))
317 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE
,
318 device
->name_assign_type
))
322 * Link type is determined on first port and mlx4 device
323 * which can potentially have two different link type for the same
324 * IB device is considered as better to be avoided in the future,
326 port
= rdma_start_port(device
);
327 if (rdma_cap_opa_mad(device
, port
))
328 ret
= nla_put_string(msg
, RDMA_NLDEV_ATTR_DEV_PROTOCOL
, "opa");
329 else if (rdma_protocol_ib(device
, port
))
330 ret
= nla_put_string(msg
, RDMA_NLDEV_ATTR_DEV_PROTOCOL
, "ib");
331 else if (rdma_protocol_iwarp(device
, port
))
332 ret
= nla_put_string(msg
, RDMA_NLDEV_ATTR_DEV_PROTOCOL
, "iw");
333 else if (rdma_protocol_roce(device
, port
))
334 ret
= nla_put_string(msg
, RDMA_NLDEV_ATTR_DEV_PROTOCOL
, "roce");
335 else if (rdma_protocol_usnic(device
, port
))
336 ret
= nla_put_string(msg
, RDMA_NLDEV_ATTR_DEV_PROTOCOL
,
341 static int fill_port_info(struct sk_buff
*msg
,
342 struct ib_device
*device
, u32 port
,
343 const struct net
*net
)
345 struct net_device
*netdev
= NULL
;
346 struct ib_port_attr attr
;
350 if (fill_nldev_handle(msg
, device
))
353 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, port
))
356 ret
= ib_query_port(device
, port
, &attr
);
360 if (rdma_protocol_ib(device
, port
)) {
361 BUILD_BUG_ON((sizeof(attr
.port_cap_flags
) +
362 sizeof(attr
.port_cap_flags2
)) > sizeof(u64
));
363 cap_flags
= attr
.port_cap_flags
|
364 ((u64
)attr
.port_cap_flags2
<< 32);
365 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_CAP_FLAGS
,
366 cap_flags
, RDMA_NLDEV_ATTR_PAD
))
368 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_SUBNET_PREFIX
,
369 attr
.subnet_prefix
, RDMA_NLDEV_ATTR_PAD
))
371 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_LID
, attr
.lid
))
373 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_SM_LID
, attr
.sm_lid
))
375 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_LMC
, attr
.lmc
))
378 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_PORT_STATE
, attr
.state
))
380 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_PORT_PHYS_STATE
, attr
.phys_state
))
383 netdev
= ib_device_get_netdev(device
, port
);
384 if (netdev
&& net_eq(dev_net(netdev
), net
)) {
385 ret
= nla_put_u32(msg
,
386 RDMA_NLDEV_ATTR_NDEV_INDEX
, netdev
->ifindex
);
389 ret
= nla_put_string(msg
,
390 RDMA_NLDEV_ATTR_NDEV_NAME
, netdev
->name
);
398 static int fill_res_info_entry(struct sk_buff
*msg
,
399 const char *name
, u64 curr
)
401 struct nlattr
*entry_attr
;
403 entry_attr
= nla_nest_start_noflag(msg
,
404 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY
);
408 if (nla_put_string(msg
, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME
, name
))
410 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR
, curr
,
411 RDMA_NLDEV_ATTR_PAD
))
414 nla_nest_end(msg
, entry_attr
);
418 nla_nest_cancel(msg
, entry_attr
);
422 static int fill_res_info(struct sk_buff
*msg
, struct ib_device
*device
,
425 static const char * const names
[RDMA_RESTRACK_MAX
] = {
426 [RDMA_RESTRACK_PD
] = "pd",
427 [RDMA_RESTRACK_CQ
] = "cq",
428 [RDMA_RESTRACK_QP
] = "qp",
429 [RDMA_RESTRACK_CM_ID
] = "cm_id",
430 [RDMA_RESTRACK_MR
] = "mr",
431 [RDMA_RESTRACK_CTX
] = "ctx",
432 [RDMA_RESTRACK_SRQ
] = "srq",
435 struct nlattr
*table_attr
;
438 if (fill_nldev_handle(msg
, device
))
441 table_attr
= nla_nest_start_noflag(msg
, RDMA_NLDEV_ATTR_RES_SUMMARY
);
445 for (i
= 0; i
< RDMA_RESTRACK_MAX
; i
++) {
448 curr
= rdma_restrack_count(device
, i
, show_details
);
449 ret
= fill_res_info_entry(msg
, names
[i
], curr
);
454 nla_nest_end(msg
, table_attr
);
458 nla_nest_cancel(msg
, table_attr
);
462 static int fill_res_name_pid(struct sk_buff
*msg
,
463 struct rdma_restrack_entry
*res
)
468 * For user resources, user is should read /proc/PID/comm to get the
469 * name of the task file.
471 if (rdma_is_kernel_res(res
)) {
472 err
= nla_put_string(msg
, RDMA_NLDEV_ATTR_RES_KERN_NAME
,
477 pid
= task_pid_vnr(res
->task
);
479 * Task is dead and in zombie state.
480 * There is no need to print PID anymore.
484 * This part is racy, task can be killed and PID will
485 * be zero right here but it is ok, next query won't
486 * return PID. We don't promise real-time reflection
489 err
= nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_PID
, pid
);
492 return err
? -EMSGSIZE
: 0;
495 static int fill_res_qp_entry_query(struct sk_buff
*msg
,
496 struct rdma_restrack_entry
*res
,
497 struct ib_device
*dev
,
500 struct ib_qp_init_attr qp_init_attr
;
501 struct ib_qp_attr qp_attr
;
504 ret
= ib_query_qp(qp
, &qp_attr
, 0, &qp_init_attr
);
508 if (qp
->qp_type
== IB_QPT_RC
|| qp
->qp_type
== IB_QPT_UC
) {
509 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_RQPN
,
510 qp_attr
.dest_qp_num
))
512 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_RQ_PSN
,
517 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_SQ_PSN
, qp_attr
.sq_psn
))
520 if (qp
->qp_type
== IB_QPT_RC
|| qp
->qp_type
== IB_QPT_UC
||
521 qp
->qp_type
== IB_QPT_XRC_INI
|| qp
->qp_type
== IB_QPT_XRC_TGT
) {
522 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE
,
523 qp_attr
.path_mig_state
))
526 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_RES_TYPE
, qp
->qp_type
))
528 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_RES_STATE
, qp_attr
.qp_state
))
531 if (dev
->ops
.fill_res_qp_entry
)
532 return dev
->ops
.fill_res_qp_entry(msg
, qp
);
535 err
: return -EMSGSIZE
;
538 static int fill_res_qp_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
539 struct rdma_restrack_entry
*res
, uint32_t port
)
541 struct ib_qp
*qp
= container_of(res
, struct ib_qp
, res
);
542 struct ib_device
*dev
= qp
->device
;
545 if (port
&& port
!= qp
->port
)
548 /* In create_qp() port is not set yet */
549 if (qp
->port
&& nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, qp
->port
))
552 ret
= nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_LQPN
, qp
->qp_num
);
556 if (!rdma_is_kernel_res(res
) &&
557 nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_PDN
, qp
->pd
->res
.id
))
560 ret
= fill_res_name_pid(msg
, res
);
564 return fill_res_qp_entry_query(msg
, res
, dev
, qp
);
567 static int fill_res_qp_raw_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
568 struct rdma_restrack_entry
*res
, uint32_t port
)
570 struct ib_qp
*qp
= container_of(res
, struct ib_qp
, res
);
571 struct ib_device
*dev
= qp
->device
;
573 if (port
&& port
!= qp
->port
)
575 if (!dev
->ops
.fill_res_qp_entry_raw
)
577 return dev
->ops
.fill_res_qp_entry_raw(msg
, qp
);
580 static int fill_res_cm_id_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
581 struct rdma_restrack_entry
*res
, uint32_t port
)
583 struct rdma_id_private
*id_priv
=
584 container_of(res
, struct rdma_id_private
, res
);
585 struct ib_device
*dev
= id_priv
->id
.device
;
586 struct rdma_cm_id
*cm_id
= &id_priv
->id
;
588 if (port
&& port
!= cm_id
->port_num
)
591 if (cm_id
->port_num
&&
592 nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, cm_id
->port_num
))
595 if (id_priv
->qp_num
) {
596 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_LQPN
, id_priv
->qp_num
))
598 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_RES_TYPE
, cm_id
->qp_type
))
602 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_PS
, cm_id
->ps
))
605 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_RES_STATE
, id_priv
->state
))
608 if (cm_id
->route
.addr
.src_addr
.ss_family
&&
609 nla_put(msg
, RDMA_NLDEV_ATTR_RES_SRC_ADDR
,
610 sizeof(cm_id
->route
.addr
.src_addr
),
611 &cm_id
->route
.addr
.src_addr
))
613 if (cm_id
->route
.addr
.dst_addr
.ss_family
&&
614 nla_put(msg
, RDMA_NLDEV_ATTR_RES_DST_ADDR
,
615 sizeof(cm_id
->route
.addr
.dst_addr
),
616 &cm_id
->route
.addr
.dst_addr
))
619 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_CM_IDN
, res
->id
))
622 if (fill_res_name_pid(msg
, res
))
625 if (dev
->ops
.fill_res_cm_id_entry
)
626 return dev
->ops
.fill_res_cm_id_entry(msg
, cm_id
);
629 err
: return -EMSGSIZE
;
632 static int fill_res_cq_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
633 struct rdma_restrack_entry
*res
, uint32_t port
)
635 struct ib_cq
*cq
= container_of(res
, struct ib_cq
, res
);
636 struct ib_device
*dev
= cq
->device
;
638 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_CQE
, cq
->cqe
))
640 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_RES_USECNT
,
641 atomic_read(&cq
->usecnt
), RDMA_NLDEV_ATTR_PAD
))
644 /* Poll context is only valid for kernel CQs */
645 if (rdma_is_kernel_res(res
) &&
646 nla_put_u8(msg
, RDMA_NLDEV_ATTR_RES_POLL_CTX
, cq
->poll_ctx
))
649 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_DEV_DIM
, (cq
->dim
!= NULL
)))
652 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_CQN
, res
->id
))
654 if (!rdma_is_kernel_res(res
) &&
655 nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_CTXN
,
656 cq
->uobject
->uevent
.uobject
.context
->res
.id
))
659 if (fill_res_name_pid(msg
, res
))
662 return (dev
->ops
.fill_res_cq_entry
) ?
663 dev
->ops
.fill_res_cq_entry(msg
, cq
) : 0;
666 static int fill_res_cq_raw_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
667 struct rdma_restrack_entry
*res
, uint32_t port
)
669 struct ib_cq
*cq
= container_of(res
, struct ib_cq
, res
);
670 struct ib_device
*dev
= cq
->device
;
672 if (!dev
->ops
.fill_res_cq_entry_raw
)
674 return dev
->ops
.fill_res_cq_entry_raw(msg
, cq
);
677 static int fill_res_mr_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
678 struct rdma_restrack_entry
*res
, uint32_t port
)
680 struct ib_mr
*mr
= container_of(res
, struct ib_mr
, res
);
681 struct ib_device
*dev
= mr
->pd
->device
;
683 if (has_cap_net_admin
) {
684 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_RKEY
, mr
->rkey
))
686 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_LKEY
, mr
->lkey
))
690 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_RES_MRLEN
, mr
->length
,
691 RDMA_NLDEV_ATTR_PAD
))
694 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_MRN
, res
->id
))
697 if (!rdma_is_kernel_res(res
) &&
698 nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_PDN
, mr
->pd
->res
.id
))
701 if (fill_res_name_pid(msg
, res
))
704 return (dev
->ops
.fill_res_mr_entry
) ?
705 dev
->ops
.fill_res_mr_entry(msg
, mr
) :
709 static int fill_res_mr_raw_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
710 struct rdma_restrack_entry
*res
, uint32_t port
)
712 struct ib_mr
*mr
= container_of(res
, struct ib_mr
, res
);
713 struct ib_device
*dev
= mr
->pd
->device
;
715 if (!dev
->ops
.fill_res_mr_entry_raw
)
717 return dev
->ops
.fill_res_mr_entry_raw(msg
, mr
);
720 static int fill_res_pd_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
721 struct rdma_restrack_entry
*res
, uint32_t port
)
723 struct ib_pd
*pd
= container_of(res
, struct ib_pd
, res
);
725 if (has_cap_net_admin
) {
726 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY
,
729 if ((pd
->flags
& IB_PD_UNSAFE_GLOBAL_RKEY
) &&
730 nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY
,
731 pd
->unsafe_global_rkey
))
734 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_RES_USECNT
,
735 atomic_read(&pd
->usecnt
), RDMA_NLDEV_ATTR_PAD
))
738 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_PDN
, res
->id
))
741 if (!rdma_is_kernel_res(res
) &&
742 nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_CTXN
,
743 pd
->uobject
->context
->res
.id
))
746 return fill_res_name_pid(msg
, res
);
748 err
: return -EMSGSIZE
;
751 static int fill_res_ctx_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
752 struct rdma_restrack_entry
*res
, uint32_t port
)
754 struct ib_ucontext
*ctx
= container_of(res
, struct ib_ucontext
, res
);
756 if (rdma_is_kernel_res(res
))
759 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_CTXN
, ctx
->res
.id
))
762 return fill_res_name_pid(msg
, res
);
765 static int fill_res_range_qp_entry(struct sk_buff
*msg
, uint32_t min_range
,
768 struct nlattr
*entry_attr
;
773 entry_attr
= nla_nest_start(msg
, RDMA_NLDEV_ATTR_RES_QP_ENTRY
);
777 if (min_range
== max_range
) {
778 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_LQPN
, min_range
))
781 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_MIN_RANGE
, min_range
))
783 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_MAX_RANGE
, max_range
))
786 nla_nest_end(msg
, entry_attr
);
790 nla_nest_cancel(msg
, entry_attr
);
794 static int fill_res_srq_qps(struct sk_buff
*msg
, struct ib_srq
*srq
)
796 uint32_t min_range
= 0, prev
= 0;
797 struct rdma_restrack_entry
*res
;
798 struct rdma_restrack_root
*rt
;
799 struct nlattr
*table_attr
;
800 struct ib_qp
*qp
= NULL
;
801 unsigned long id
= 0;
803 table_attr
= nla_nest_start(msg
, RDMA_NLDEV_ATTR_RES_QP
);
807 rt
= &srq
->device
->res
[RDMA_RESTRACK_QP
];
809 xa_for_each(&rt
->xa
, id
, res
) {
810 if (!rdma_restrack_get(res
))
813 qp
= container_of(res
, struct ib_qp
, res
);
814 if (!qp
->srq
|| (qp
->srq
->res
.id
!= srq
->res
.id
)) {
815 rdma_restrack_put(res
);
819 if (qp
->qp_num
< prev
)
820 /* qp_num should be ascending */
823 if (min_range
== 0) {
824 min_range
= qp
->qp_num
;
825 } else if (qp
->qp_num
> (prev
+ 1)) {
826 if (fill_res_range_qp_entry(msg
, min_range
, prev
))
829 min_range
= qp
->qp_num
;
832 rdma_restrack_put(res
);
837 if (fill_res_range_qp_entry(msg
, min_range
, prev
))
840 nla_nest_end(msg
, table_attr
);
844 rdma_restrack_put(res
);
847 nla_nest_cancel(msg
, table_attr
);
851 static int fill_res_srq_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
852 struct rdma_restrack_entry
*res
, uint32_t port
)
854 struct ib_srq
*srq
= container_of(res
, struct ib_srq
, res
);
855 struct ib_device
*dev
= srq
->device
;
857 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_SRQN
, srq
->res
.id
))
860 if (nla_put_u8(msg
, RDMA_NLDEV_ATTR_RES_TYPE
, srq
->srq_type
))
863 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_PDN
, srq
->pd
->res
.id
))
866 if (ib_srq_has_cq(srq
->srq_type
)) {
867 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_CQN
,
868 srq
->ext
.cq
->res
.id
))
872 if (fill_res_srq_qps(msg
, srq
))
875 if (fill_res_name_pid(msg
, res
))
878 if (dev
->ops
.fill_res_srq_entry
)
879 return dev
->ops
.fill_res_srq_entry(msg
, srq
);
887 static int fill_res_srq_raw_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
888 struct rdma_restrack_entry
*res
, uint32_t port
)
890 struct ib_srq
*srq
= container_of(res
, struct ib_srq
, res
);
891 struct ib_device
*dev
= srq
->device
;
893 if (!dev
->ops
.fill_res_srq_entry_raw
)
895 return dev
->ops
.fill_res_srq_entry_raw(msg
, srq
);
898 static int fill_stat_counter_mode(struct sk_buff
*msg
,
899 struct rdma_counter
*counter
)
901 struct rdma_counter_mode
*m
= &counter
->mode
;
903 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_STAT_MODE
, m
->mode
))
906 if (m
->mode
== RDMA_COUNTER_MODE_AUTO
) {
907 if ((m
->mask
& RDMA_COUNTER_MASK_QP_TYPE
) &&
908 nla_put_u8(msg
, RDMA_NLDEV_ATTR_RES_TYPE
, m
->param
.qp_type
))
911 if ((m
->mask
& RDMA_COUNTER_MASK_PID
) &&
912 fill_res_name_pid(msg
, &counter
->res
))
919 static int fill_stat_counter_qp_entry(struct sk_buff
*msg
, u32 qpn
)
921 struct nlattr
*entry_attr
;
923 entry_attr
= nla_nest_start(msg
, RDMA_NLDEV_ATTR_RES_QP_ENTRY
);
927 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_LQPN
, qpn
))
930 nla_nest_end(msg
, entry_attr
);
934 nla_nest_cancel(msg
, entry_attr
);
938 static int fill_stat_counter_qps(struct sk_buff
*msg
,
939 struct rdma_counter
*counter
)
941 struct rdma_restrack_entry
*res
;
942 struct rdma_restrack_root
*rt
;
943 struct nlattr
*table_attr
;
944 struct ib_qp
*qp
= NULL
;
945 unsigned long id
= 0;
948 table_attr
= nla_nest_start(msg
, RDMA_NLDEV_ATTR_RES_QP
);
952 rt
= &counter
->device
->res
[RDMA_RESTRACK_QP
];
954 xa_for_each(&rt
->xa
, id
, res
) {
955 qp
= container_of(res
, struct ib_qp
, res
);
956 if (!qp
->counter
|| (qp
->counter
->id
!= counter
->id
))
959 ret
= fill_stat_counter_qp_entry(msg
, qp
->qp_num
);
965 nla_nest_end(msg
, table_attr
);
970 nla_nest_cancel(msg
, table_attr
);
974 int rdma_nl_stat_hwcounter_entry(struct sk_buff
*msg
, const char *name
,
977 struct nlattr
*entry_attr
;
979 entry_attr
= nla_nest_start(msg
, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY
);
983 if (nla_put_string(msg
, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME
,
986 if (nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE
,
987 value
, RDMA_NLDEV_ATTR_PAD
))
990 nla_nest_end(msg
, entry_attr
);
994 nla_nest_cancel(msg
, entry_attr
);
997 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry
);
999 static int fill_stat_mr_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
1000 struct rdma_restrack_entry
*res
, uint32_t port
)
1002 struct ib_mr
*mr
= container_of(res
, struct ib_mr
, res
);
1003 struct ib_device
*dev
= mr
->pd
->device
;
1005 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_MRN
, res
->id
))
1008 if (dev
->ops
.fill_stat_mr_entry
)
1009 return dev
->ops
.fill_stat_mr_entry(msg
, mr
);
1016 static int fill_stat_counter_hwcounters(struct sk_buff
*msg
,
1017 struct rdma_counter
*counter
)
1019 struct rdma_hw_stats
*st
= counter
->stats
;
1020 struct nlattr
*table_attr
;
1023 table_attr
= nla_nest_start(msg
, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS
);
1027 mutex_lock(&st
->lock
);
1028 for (i
= 0; i
< st
->num_counters
; i
++) {
1029 if (test_bit(i
, st
->is_disabled
))
1031 if (rdma_nl_stat_hwcounter_entry(msg
, st
->descs
[i
].name
,
1035 mutex_unlock(&st
->lock
);
1037 nla_nest_end(msg
, table_attr
);
1041 mutex_unlock(&st
->lock
);
1042 nla_nest_cancel(msg
, table_attr
);
1046 static int fill_res_counter_entry(struct sk_buff
*msg
, bool has_cap_net_admin
,
1047 struct rdma_restrack_entry
*res
,
1050 struct rdma_counter
*counter
=
1051 container_of(res
, struct rdma_counter
, res
);
1053 if (port
&& port
!= counter
->port
)
1056 /* Dump it even query failed */
1057 rdma_counter_query_stats(counter
);
1059 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, counter
->port
) ||
1060 nla_put_u32(msg
, RDMA_NLDEV_ATTR_STAT_COUNTER_ID
, counter
->id
) ||
1061 fill_stat_counter_mode(msg
, counter
) ||
1062 fill_stat_counter_qps(msg
, counter
) ||
1063 fill_stat_counter_hwcounters(msg
, counter
))
1069 static int nldev_get_doit(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1070 struct netlink_ext_ack
*extack
)
1072 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1073 struct ib_device
*device
;
1074 struct sk_buff
*msg
;
1078 err
= __nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
1079 nldev_policy
, NL_VALIDATE_LIBERAL
, extack
);
1080 if (err
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
])
1083 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
1085 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
1089 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
1095 nlh
= nlmsg_put(msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
1096 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
, RDMA_NLDEV_CMD_GET
),
1103 err
= fill_dev_info(msg
, device
);
1107 nlmsg_end(msg
, nlh
);
1109 ib_device_put(device
);
1110 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
1115 ib_device_put(device
);
1119 static int nldev_set_doit(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1120 struct netlink_ext_ack
*extack
)
1122 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1123 struct ib_device
*device
;
1127 err
= nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
1128 nldev_policy
, extack
);
1129 if (err
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
])
1132 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
1133 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
1137 if (tb
[RDMA_NLDEV_ATTR_DEV_NAME
]) {
1138 char name
[IB_DEVICE_NAME_MAX
] = {};
1140 nla_strscpy(name
, tb
[RDMA_NLDEV_ATTR_DEV_NAME
],
1141 IB_DEVICE_NAME_MAX
);
1142 if (strlen(name
) == 0) {
1146 err
= ib_device_rename(device
, name
);
1150 if (tb
[RDMA_NLDEV_NET_NS_FD
]) {
1153 ns_fd
= nla_get_u32(tb
[RDMA_NLDEV_NET_NS_FD
]);
1154 err
= ib_device_set_netns_put(skb
, device
, ns_fd
);
1158 if (tb
[RDMA_NLDEV_ATTR_DEV_DIM
]) {
1161 use_dim
= nla_get_u8(tb
[RDMA_NLDEV_ATTR_DEV_DIM
]);
1162 err
= ib_device_set_dim(device
, use_dim
);
1167 ib_device_put(device
);
1172 static int _nldev_get_dumpit(struct ib_device
*device
,
1173 struct sk_buff
*skb
,
1174 struct netlink_callback
*cb
,
1177 int start
= cb
->args
[0];
1178 struct nlmsghdr
*nlh
;
1183 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
1184 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
, RDMA_NLDEV_CMD_GET
),
1187 if (!nlh
|| fill_dev_info(skb
, device
)) {
1188 nlmsg_cancel(skb
, nlh
);
1192 nlmsg_end(skb
, nlh
);
1196 out
: cb
->args
[0] = idx
;
1200 static int nldev_get_dumpit(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1203 * There is no need to take lock, because
1204 * we are relying on ib_core's locking.
1206 return ib_enum_all_devs(_nldev_get_dumpit
, skb
, cb
);
1209 static int nldev_port_get_doit(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1210 struct netlink_ext_ack
*extack
)
1212 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1213 struct ib_device
*device
;
1214 struct sk_buff
*msg
;
1219 err
= __nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
1220 nldev_policy
, NL_VALIDATE_LIBERAL
, extack
);
1222 !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
] ||
1223 !tb
[RDMA_NLDEV_ATTR_PORT_INDEX
])
1226 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
1227 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
1231 port
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]);
1232 if (!rdma_is_port_valid(device
, port
)) {
1237 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
1243 nlh
= nlmsg_put(msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
1244 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
, RDMA_NLDEV_CMD_GET
),
1251 err
= fill_port_info(msg
, device
, port
, sock_net(skb
->sk
));
1255 nlmsg_end(msg
, nlh
);
1256 ib_device_put(device
);
1258 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
1263 ib_device_put(device
);
1267 static int nldev_port_get_dumpit(struct sk_buff
*skb
,
1268 struct netlink_callback
*cb
)
1270 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1271 struct ib_device
*device
;
1272 int start
= cb
->args
[0];
1273 struct nlmsghdr
*nlh
;
1279 err
= __nlmsg_parse(cb
->nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
1280 nldev_policy
, NL_VALIDATE_LIBERAL
, NULL
);
1281 if (err
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
])
1284 ifindex
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
1285 device
= ib_device_get_by_index(sock_net(skb
->sk
), ifindex
);
1289 rdma_for_each_port (device
, p
) {
1291 * The dumpit function returns all information from specific
1292 * index. This specific index is taken from the netlink
1293 * messages request sent by user and it is available
1296 * Usually, the user doesn't fill this field and it causes
1297 * to return everything.
1305 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
,
1307 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
,
1308 RDMA_NLDEV_CMD_PORT_GET
),
1311 if (!nlh
|| fill_port_info(skb
, device
, p
, sock_net(skb
->sk
))) {
1312 nlmsg_cancel(skb
, nlh
);
1316 nlmsg_end(skb
, nlh
);
1320 ib_device_put(device
);
1325 static int nldev_res_get_doit(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1326 struct netlink_ext_ack
*extack
)
1328 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1329 bool show_details
= false;
1330 struct ib_device
*device
;
1331 struct sk_buff
*msg
;
1335 ret
= __nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
1336 nldev_policy
, NL_VALIDATE_LIBERAL
, extack
);
1337 if (ret
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
])
1340 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
1341 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
1345 if (tb
[RDMA_NLDEV_ATTR_DRIVER_DETAILS
])
1346 show_details
= nla_get_u8(tb
[RDMA_NLDEV_ATTR_DRIVER_DETAILS
]);
1348 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
1354 nlh
= nlmsg_put(msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
1355 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
, RDMA_NLDEV_CMD_RES_GET
),
1362 ret
= fill_res_info(msg
, device
, show_details
);
1366 nlmsg_end(msg
, nlh
);
1367 ib_device_put(device
);
1368 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
1373 ib_device_put(device
);
1377 static int _nldev_res_get_dumpit(struct ib_device
*device
,
1378 struct sk_buff
*skb
,
1379 struct netlink_callback
*cb
,
1382 int start
= cb
->args
[0];
1383 struct nlmsghdr
*nlh
;
1388 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
1389 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
, RDMA_NLDEV_CMD_RES_GET
),
1392 if (!nlh
|| fill_res_info(skb
, device
, false)) {
1393 nlmsg_cancel(skb
, nlh
);
1396 nlmsg_end(skb
, nlh
);
1405 static int nldev_res_get_dumpit(struct sk_buff
*skb
,
1406 struct netlink_callback
*cb
)
1408 return ib_enum_all_devs(_nldev_res_get_dumpit
, skb
, cb
);
1411 struct nldev_fill_res_entry
{
1412 enum rdma_nldev_attr nldev_attr
;
1418 enum nldev_res_flags
{
1419 NLDEV_PER_DEV
= 1 << 0,
1422 static const struct nldev_fill_res_entry fill_entries
[RDMA_RESTRACK_MAX
] = {
1423 [RDMA_RESTRACK_QP
] = {
1424 .nldev_attr
= RDMA_NLDEV_ATTR_RES_QP
,
1425 .entry
= RDMA_NLDEV_ATTR_RES_QP_ENTRY
,
1426 .id
= RDMA_NLDEV_ATTR_RES_LQPN
,
1428 [RDMA_RESTRACK_CM_ID
] = {
1429 .nldev_attr
= RDMA_NLDEV_ATTR_RES_CM_ID
,
1430 .entry
= RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY
,
1431 .id
= RDMA_NLDEV_ATTR_RES_CM_IDN
,
1433 [RDMA_RESTRACK_CQ
] = {
1434 .nldev_attr
= RDMA_NLDEV_ATTR_RES_CQ
,
1435 .flags
= NLDEV_PER_DEV
,
1436 .entry
= RDMA_NLDEV_ATTR_RES_CQ_ENTRY
,
1437 .id
= RDMA_NLDEV_ATTR_RES_CQN
,
1439 [RDMA_RESTRACK_MR
] = {
1440 .nldev_attr
= RDMA_NLDEV_ATTR_RES_MR
,
1441 .flags
= NLDEV_PER_DEV
,
1442 .entry
= RDMA_NLDEV_ATTR_RES_MR_ENTRY
,
1443 .id
= RDMA_NLDEV_ATTR_RES_MRN
,
1445 [RDMA_RESTRACK_PD
] = {
1446 .nldev_attr
= RDMA_NLDEV_ATTR_RES_PD
,
1447 .flags
= NLDEV_PER_DEV
,
1448 .entry
= RDMA_NLDEV_ATTR_RES_PD_ENTRY
,
1449 .id
= RDMA_NLDEV_ATTR_RES_PDN
,
1451 [RDMA_RESTRACK_COUNTER
] = {
1452 .nldev_attr
= RDMA_NLDEV_ATTR_STAT_COUNTER
,
1453 .entry
= RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY
,
1454 .id
= RDMA_NLDEV_ATTR_STAT_COUNTER_ID
,
1456 [RDMA_RESTRACK_CTX
] = {
1457 .nldev_attr
= RDMA_NLDEV_ATTR_RES_CTX
,
1458 .flags
= NLDEV_PER_DEV
,
1459 .entry
= RDMA_NLDEV_ATTR_RES_CTX_ENTRY
,
1460 .id
= RDMA_NLDEV_ATTR_RES_CTXN
,
1462 [RDMA_RESTRACK_SRQ
] = {
1463 .nldev_attr
= RDMA_NLDEV_ATTR_RES_SRQ
,
1464 .flags
= NLDEV_PER_DEV
,
1465 .entry
= RDMA_NLDEV_ATTR_RES_SRQ_ENTRY
,
1466 .id
= RDMA_NLDEV_ATTR_RES_SRQN
,
1471 static int res_get_common_doit(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1472 struct netlink_ext_ack
*extack
,
1473 enum rdma_restrack_type res_type
,
1474 res_fill_func_t fill_func
)
1476 const struct nldev_fill_res_entry
*fe
= &fill_entries
[res_type
];
1477 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1478 struct rdma_restrack_entry
*res
;
1479 struct ib_device
*device
;
1480 u32 index
, id
, port
= 0;
1481 bool has_cap_net_admin
;
1482 struct sk_buff
*msg
;
1485 ret
= __nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
1486 nldev_policy
, NL_VALIDATE_LIBERAL
, extack
);
1487 if (ret
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
] || !fe
->id
|| !tb
[fe
->id
])
1490 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
1491 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
1495 if (tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]) {
1496 port
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]);
1497 if (!rdma_is_port_valid(device
, port
)) {
1503 if ((port
&& fe
->flags
& NLDEV_PER_DEV
) ||
1504 (!port
&& ~fe
->flags
& NLDEV_PER_DEV
)) {
1509 id
= nla_get_u32(tb
[fe
->id
]);
1510 res
= rdma_restrack_get_byid(device
, res_type
, id
);
1516 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
1522 nlh
= nlmsg_put(msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
1523 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
,
1524 RDMA_NL_GET_OP(nlh
->nlmsg_type
)),
1527 if (!nlh
|| fill_nldev_handle(msg
, device
)) {
1532 has_cap_net_admin
= netlink_capable(skb
, CAP_NET_ADMIN
);
1534 ret
= fill_func(msg
, has_cap_net_admin
, res
, port
);
1538 rdma_restrack_put(res
);
1539 nlmsg_end(msg
, nlh
);
1540 ib_device_put(device
);
1541 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
1546 rdma_restrack_put(res
);
1548 ib_device_put(device
);
1552 static int res_get_common_dumpit(struct sk_buff
*skb
,
1553 struct netlink_callback
*cb
,
1554 enum rdma_restrack_type res_type
,
1555 res_fill_func_t fill_func
)
1557 const struct nldev_fill_res_entry
*fe
= &fill_entries
[res_type
];
1558 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1559 struct rdma_restrack_entry
*res
;
1560 struct rdma_restrack_root
*rt
;
1561 int err
, ret
= 0, idx
= 0;
1562 bool show_details
= false;
1563 struct nlattr
*table_attr
;
1564 struct nlattr
*entry_attr
;
1565 struct ib_device
*device
;
1566 int start
= cb
->args
[0];
1567 bool has_cap_net_admin
;
1568 struct nlmsghdr
*nlh
;
1570 u32 index
, port
= 0;
1571 bool filled
= false;
1573 err
= __nlmsg_parse(cb
->nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
1574 nldev_policy
, NL_VALIDATE_LIBERAL
, NULL
);
1576 * Right now, we are expecting the device index to get res information,
1577 * but it is possible to extend this code to return all devices in
1578 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1579 * if it doesn't exist, we will iterate over all devices.
1581 * But it is not needed for now.
1583 if (err
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
])
1586 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
1587 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
1591 if (tb
[RDMA_NLDEV_ATTR_DRIVER_DETAILS
])
1592 show_details
= nla_get_u8(tb
[RDMA_NLDEV_ATTR_DRIVER_DETAILS
]);
1595 * If no PORT_INDEX is supplied, we will return all QPs from that device
1597 if (tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]) {
1598 port
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]);
1599 if (!rdma_is_port_valid(device
, port
)) {
1605 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
1606 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
,
1607 RDMA_NL_GET_OP(cb
->nlh
->nlmsg_type
)),
1610 if (!nlh
|| fill_nldev_handle(skb
, device
)) {
1615 table_attr
= nla_nest_start_noflag(skb
, fe
->nldev_attr
);
1621 has_cap_net_admin
= netlink_capable(cb
->skb
, CAP_NET_ADMIN
);
1623 rt
= &device
->res
[res_type
];
1626 * FIXME: if the skip ahead is something common this loop should
1627 * use xas_for_each & xas_pause to optimize, we can have a lot of
1630 xa_for_each(&rt
->xa
, id
, res
) {
1631 if (xa_get_mark(&rt
->xa
, res
->id
, RESTRACK_DD
) && !show_details
)
1634 if (idx
< start
|| !rdma_restrack_get(res
))
1641 entry_attr
= nla_nest_start_noflag(skb
, fe
->entry
);
1644 rdma_restrack_put(res
);
1648 ret
= fill_func(skb
, has_cap_net_admin
, res
, port
);
1650 rdma_restrack_put(res
);
1653 nla_nest_cancel(skb
, entry_attr
);
1654 if (ret
== -EMSGSIZE
)
1660 nla_nest_end(skb
, entry_attr
);
1661 again
: xa_lock(&rt
->xa
);
1667 nla_nest_end(skb
, table_attr
);
1668 nlmsg_end(skb
, nlh
);
1672 * No more entries to fill, cancel the message and
1673 * return 0 to mark end of dumpit.
1678 ib_device_put(device
);
1682 nla_nest_cancel(skb
, table_attr
);
1685 nlmsg_cancel(skb
, nlh
);
1688 ib_device_put(device
);
1692 #define RES_GET_FUNCS(name, type) \
1693 static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
1694 struct netlink_callback *cb) \
1696 return res_get_common_dumpit(skb, cb, type, \
1697 fill_res_##name##_entry); \
1699 static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
1700 struct nlmsghdr *nlh, \
1701 struct netlink_ext_ack *extack) \
1703 return res_get_common_doit(skb, nlh, extack, type, \
1704 fill_res_##name##_entry); \
1707 RES_GET_FUNCS(qp
, RDMA_RESTRACK_QP
);
1708 RES_GET_FUNCS(qp_raw
, RDMA_RESTRACK_QP
);
1709 RES_GET_FUNCS(cm_id
, RDMA_RESTRACK_CM_ID
);
1710 RES_GET_FUNCS(cq
, RDMA_RESTRACK_CQ
);
1711 RES_GET_FUNCS(cq_raw
, RDMA_RESTRACK_CQ
);
1712 RES_GET_FUNCS(pd
, RDMA_RESTRACK_PD
);
1713 RES_GET_FUNCS(mr
, RDMA_RESTRACK_MR
);
1714 RES_GET_FUNCS(mr_raw
, RDMA_RESTRACK_MR
);
1715 RES_GET_FUNCS(counter
, RDMA_RESTRACK_COUNTER
);
1716 RES_GET_FUNCS(ctx
, RDMA_RESTRACK_CTX
);
1717 RES_GET_FUNCS(srq
, RDMA_RESTRACK_SRQ
);
1718 RES_GET_FUNCS(srq_raw
, RDMA_RESTRACK_SRQ
);
1720 static LIST_HEAD(link_ops
);
1721 static DECLARE_RWSEM(link_ops_rwsem
);
1723 static const struct rdma_link_ops
*link_ops_get(const char *type
)
1725 const struct rdma_link_ops
*ops
;
1727 list_for_each_entry(ops
, &link_ops
, list
) {
1728 if (!strcmp(ops
->type
, type
))
1736 void rdma_link_register(struct rdma_link_ops
*ops
)
1738 down_write(&link_ops_rwsem
);
1739 if (WARN_ON_ONCE(link_ops_get(ops
->type
)))
1741 list_add(&ops
->list
, &link_ops
);
1743 up_write(&link_ops_rwsem
);
1745 EXPORT_SYMBOL(rdma_link_register
);
1747 void rdma_link_unregister(struct rdma_link_ops
*ops
)
1749 down_write(&link_ops_rwsem
);
1750 list_del(&ops
->list
);
1751 up_write(&link_ops_rwsem
);
1753 EXPORT_SYMBOL(rdma_link_unregister
);
1755 static int nldev_newlink(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1756 struct netlink_ext_ack
*extack
)
1758 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1759 char ibdev_name
[IB_DEVICE_NAME_MAX
];
1760 const struct rdma_link_ops
*ops
;
1761 char ndev_name
[IFNAMSIZ
];
1762 struct net_device
*ndev
;
1763 char type
[IFNAMSIZ
];
1766 err
= nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
1767 nldev_policy
, extack
);
1768 if (err
|| !tb
[RDMA_NLDEV_ATTR_DEV_NAME
] ||
1769 !tb
[RDMA_NLDEV_ATTR_LINK_TYPE
] || !tb
[RDMA_NLDEV_ATTR_NDEV_NAME
])
1772 nla_strscpy(ibdev_name
, tb
[RDMA_NLDEV_ATTR_DEV_NAME
],
1773 sizeof(ibdev_name
));
1774 if (strchr(ibdev_name
, '%') || strlen(ibdev_name
) == 0)
1777 nla_strscpy(type
, tb
[RDMA_NLDEV_ATTR_LINK_TYPE
], sizeof(type
));
1778 nla_strscpy(ndev_name
, tb
[RDMA_NLDEV_ATTR_NDEV_NAME
],
1781 ndev
= dev_get_by_name(sock_net(skb
->sk
), ndev_name
);
1785 down_read(&link_ops_rwsem
);
1786 ops
= link_ops_get(type
);
1787 #ifdef CONFIG_MODULES
1789 up_read(&link_ops_rwsem
);
1790 request_module("rdma-link-%s", type
);
1791 down_read(&link_ops_rwsem
);
1792 ops
= link_ops_get(type
);
1795 err
= ops
? ops
->newlink(ibdev_name
, ndev
) : -EINVAL
;
1796 up_read(&link_ops_rwsem
);
1802 static int nldev_dellink(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1803 struct netlink_ext_ack
*extack
)
1805 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1806 struct ib_device
*device
;
1810 err
= nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
1811 nldev_policy
, extack
);
1812 if (err
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
])
1815 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
1816 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
1820 if (!(device
->attrs
.kernel_cap_flags
& IBK_ALLOW_USER_UNREG
)) {
1821 ib_device_put(device
);
1825 ib_unregister_device_and_put(device
);
1829 static int nldev_get_chardev(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1830 struct netlink_ext_ack
*extack
)
1832 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1833 char client_name
[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE
];
1834 struct ib_client_nl_info data
= {};
1835 struct ib_device
*ibdev
= NULL
;
1836 struct sk_buff
*msg
;
1840 err
= __nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1, nldev_policy
,
1841 NL_VALIDATE_LIBERAL
, extack
);
1842 if (err
|| !tb
[RDMA_NLDEV_ATTR_CHARDEV_TYPE
])
1845 nla_strscpy(client_name
, tb
[RDMA_NLDEV_ATTR_CHARDEV_TYPE
],
1846 sizeof(client_name
));
1848 if (tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]) {
1849 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
1850 ibdev
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
1854 if (tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]) {
1855 data
.port
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]);
1856 if (!rdma_is_port_valid(ibdev
, data
.port
)) {
1863 } else if (tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]) {
1867 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
1872 nlh
= nlmsg_put(msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
1873 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
,
1874 RDMA_NLDEV_CMD_GET_CHARDEV
),
1882 err
= ib_get_client_nl_info(ibdev
, client_name
, &data
);
1886 err
= nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_CHARDEV
,
1887 huge_encode_dev(data
.cdev
->devt
),
1888 RDMA_NLDEV_ATTR_PAD
);
1891 err
= nla_put_u64_64bit(msg
, RDMA_NLDEV_ATTR_CHARDEV_ABI
, data
.abi
,
1892 RDMA_NLDEV_ATTR_PAD
);
1895 if (nla_put_string(msg
, RDMA_NLDEV_ATTR_CHARDEV_NAME
,
1896 dev_name(data
.cdev
))) {
1901 nlmsg_end(msg
, nlh
);
1902 put_device(data
.cdev
);
1904 ib_device_put(ibdev
);
1905 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
1908 put_device(data
.cdev
);
1913 ib_device_put(ibdev
);
1917 static int nldev_sys_get_doit(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1918 struct netlink_ext_ack
*extack
)
1920 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
1921 struct sk_buff
*msg
;
1924 err
= __nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
1925 nldev_policy
, NL_VALIDATE_LIBERAL
, extack
);
1929 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
1933 nlh
= nlmsg_put(msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
1934 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
,
1935 RDMA_NLDEV_CMD_SYS_GET
),
1942 err
= nla_put_u8(msg
, RDMA_NLDEV_SYS_ATTR_NETNS_MODE
,
1943 (u8
)ib_devices_shared_netns
);
1949 err
= nla_put_u8(msg
, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE
,
1950 (u8
)privileged_qkey
);
1956 err
= nla_put_u8(msg
, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE
, 1);
1962 * Copy-on-fork is supported.
1964 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
1965 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
1966 * for more details. Don't backport this without them.
1968 * Return value ignored on purpose, assume copy-on-fork is not
1969 * supported in case of failure.
1971 nla_put_u8(msg
, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK
, 1);
1973 nlmsg_end(msg
, nlh
);
1974 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
1977 static int nldev_set_sys_set_netns_doit(struct nlattr
*tb
[])
1982 enable
= nla_get_u8(tb
[RDMA_NLDEV_SYS_ATTR_NETNS_MODE
]);
1983 /* Only 0 and 1 are supported */
1987 err
= rdma_compatdev_set(enable
);
1991 static int nldev_set_sys_set_pqkey_doit(struct nlattr
*tb
[])
1995 enable
= nla_get_u8(tb
[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE
]);
1996 /* Only 0 and 1 are supported */
2000 privileged_qkey
= enable
;
2004 static int nldev_set_sys_set_doit(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2005 struct netlink_ext_ack
*extack
)
2007 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
2010 err
= nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
2011 nldev_policy
, extack
);
2015 if (tb
[RDMA_NLDEV_SYS_ATTR_NETNS_MODE
])
2016 return nldev_set_sys_set_netns_doit(tb
);
2018 if (tb
[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE
])
2019 return nldev_set_sys_set_pqkey_doit(tb
);
2025 static int nldev_stat_set_mode_doit(struct sk_buff
*msg
,
2026 struct netlink_ext_ack
*extack
,
2027 struct nlattr
*tb
[],
2028 struct ib_device
*device
, u32 port
)
2030 u32 mode
, mask
= 0, qpn
, cntn
= 0;
2033 /* Currently only counter for QP is supported */
2034 if (!tb
[RDMA_NLDEV_ATTR_STAT_RES
] ||
2035 nla_get_u32(tb
[RDMA_NLDEV_ATTR_STAT_RES
]) != RDMA_NLDEV_ATTR_RES_QP
)
2038 mode
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_STAT_MODE
]);
2039 if (mode
== RDMA_COUNTER_MODE_AUTO
) {
2040 if (tb
[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK
])
2042 tb
[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK
]);
2043 return rdma_counter_set_auto_mode(device
, port
, mask
, extack
);
2046 if (!tb
[RDMA_NLDEV_ATTR_RES_LQPN
])
2049 qpn
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_RES_LQPN
]);
2050 if (tb
[RDMA_NLDEV_ATTR_STAT_COUNTER_ID
]) {
2051 cntn
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_STAT_COUNTER_ID
]);
2052 ret
= rdma_counter_bind_qpn(device
, port
, qpn
, cntn
);
2056 ret
= rdma_counter_bind_qpn_alloc(device
, port
, qpn
, &cntn
);
2061 if (nla_put_u32(msg
, RDMA_NLDEV_ATTR_STAT_COUNTER_ID
, cntn
) ||
2062 nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_LQPN
, qpn
)) {
2070 rdma_counter_unbind_qpn(device
, port
, qpn
, cntn
);
2074 static int nldev_stat_set_counter_dynamic_doit(struct nlattr
*tb
[],
2075 struct ib_device
*device
,
2078 struct rdma_hw_stats
*stats
;
2079 struct nlattr
*entry_attr
;
2080 unsigned long *target
;
2081 int rem
, i
, ret
= 0;
2084 stats
= ib_get_hw_stats_port(device
, port
);
2088 target
= kcalloc(BITS_TO_LONGS(stats
->num_counters
),
2089 sizeof(*stats
->is_disabled
), GFP_KERNEL
);
2093 nla_for_each_nested(entry_attr
, tb
[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS
],
2095 index
= nla_get_u32(entry_attr
);
2096 if ((index
>= stats
->num_counters
) ||
2097 !(stats
->descs
[index
].flags
& IB_STAT_FLAG_OPTIONAL
)) {
2102 set_bit(index
, target
);
2105 for (i
= 0; i
< stats
->num_counters
; i
++) {
2106 if (!(stats
->descs
[i
].flags
& IB_STAT_FLAG_OPTIONAL
))
2109 ret
= rdma_counter_modify(device
, port
, i
, test_bit(i
, target
));
2119 static int nldev_stat_set_doit(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2120 struct netlink_ext_ack
*extack
)
2122 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
2123 struct ib_device
*device
;
2124 struct sk_buff
*msg
;
2128 ret
= nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1, nldev_policy
,
2130 if (ret
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
] ||
2131 !tb
[RDMA_NLDEV_ATTR_PORT_INDEX
])
2134 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
2135 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
2139 port
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]);
2140 if (!rdma_is_port_valid(device
, port
)) {
2142 goto err_put_device
;
2145 if (!tb
[RDMA_NLDEV_ATTR_STAT_MODE
] &&
2146 !tb
[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS
]) {
2148 goto err_put_device
;
2151 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
2154 goto err_put_device
;
2156 nlh
= nlmsg_put(msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
2157 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
,
2158 RDMA_NLDEV_CMD_STAT_SET
),
2160 if (!nlh
|| fill_nldev_handle(msg
, device
) ||
2161 nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, port
)) {
2166 if (tb
[RDMA_NLDEV_ATTR_STAT_MODE
]) {
2167 ret
= nldev_stat_set_mode_doit(msg
, extack
, tb
, device
, port
);
2172 if (tb
[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS
]) {
2173 ret
= nldev_stat_set_counter_dynamic_doit(tb
, device
, port
);
2178 nlmsg_end(msg
, nlh
);
2179 ib_device_put(device
);
2180 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
2185 ib_device_put(device
);
2189 static int nldev_stat_del_doit(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2190 struct netlink_ext_ack
*extack
)
2192 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
2193 struct ib_device
*device
;
2194 struct sk_buff
*msg
;
2195 u32 index
, port
, qpn
, cntn
;
2198 ret
= nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
2199 nldev_policy
, extack
);
2200 if (ret
|| !tb
[RDMA_NLDEV_ATTR_STAT_RES
] ||
2201 !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
] || !tb
[RDMA_NLDEV_ATTR_PORT_INDEX
] ||
2202 !tb
[RDMA_NLDEV_ATTR_STAT_COUNTER_ID
] ||
2203 !tb
[RDMA_NLDEV_ATTR_RES_LQPN
])
2206 if (nla_get_u32(tb
[RDMA_NLDEV_ATTR_STAT_RES
]) != RDMA_NLDEV_ATTR_RES_QP
)
2209 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
2210 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
2214 port
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]);
2215 if (!rdma_is_port_valid(device
, port
)) {
2220 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
2225 nlh
= nlmsg_put(msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
2226 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
,
2227 RDMA_NLDEV_CMD_STAT_SET
),
2234 cntn
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_STAT_COUNTER_ID
]);
2235 qpn
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_RES_LQPN
]);
2236 if (fill_nldev_handle(msg
, device
) ||
2237 nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, port
) ||
2238 nla_put_u32(msg
, RDMA_NLDEV_ATTR_STAT_COUNTER_ID
, cntn
) ||
2239 nla_put_u32(msg
, RDMA_NLDEV_ATTR_RES_LQPN
, qpn
)) {
2244 ret
= rdma_counter_unbind_qpn(device
, port
, qpn
, cntn
);
2248 nlmsg_end(msg
, nlh
);
2249 ib_device_put(device
);
2250 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
2255 ib_device_put(device
);
2259 static int stat_get_doit_default_counter(struct sk_buff
*skb
,
2260 struct nlmsghdr
*nlh
,
2261 struct netlink_ext_ack
*extack
,
2262 struct nlattr
*tb
[])
2264 struct rdma_hw_stats
*stats
;
2265 struct nlattr
*table_attr
;
2266 struct ib_device
*device
;
2267 int ret
, num_cnts
, i
;
2268 struct sk_buff
*msg
;
2272 if (!tb
[RDMA_NLDEV_ATTR_DEV_INDEX
] || !tb
[RDMA_NLDEV_ATTR_PORT_INDEX
])
2275 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
2276 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
2280 if (!device
->ops
.alloc_hw_port_stats
|| !device
->ops
.get_hw_stats
) {
2285 port
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]);
2286 stats
= ib_get_hw_stats_port(device
, port
);
2292 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
2298 nlh
= nlmsg_put(msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
2299 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
,
2300 RDMA_NLDEV_CMD_STAT_GET
),
2303 if (!nlh
|| fill_nldev_handle(msg
, device
) ||
2304 nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, port
)) {
2309 mutex_lock(&stats
->lock
);
2311 num_cnts
= device
->ops
.get_hw_stats(device
, stats
, port
, 0);
2317 table_attr
= nla_nest_start(msg
, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS
);
2322 for (i
= 0; i
< num_cnts
; i
++) {
2323 if (test_bit(i
, stats
->is_disabled
))
2326 v
= stats
->value
[i
] +
2327 rdma_counter_get_hwstat_value(device
, port
, i
);
2328 if (rdma_nl_stat_hwcounter_entry(msg
,
2329 stats
->descs
[i
].name
, v
)) {
2334 nla_nest_end(msg
, table_attr
);
2336 mutex_unlock(&stats
->lock
);
2337 nlmsg_end(msg
, nlh
);
2338 ib_device_put(device
);
2339 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
2342 nla_nest_cancel(msg
, table_attr
);
2344 mutex_unlock(&stats
->lock
);
2348 ib_device_put(device
);
2352 static int stat_get_doit_qp(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2353 struct netlink_ext_ack
*extack
, struct nlattr
*tb
[])
2356 static enum rdma_nl_counter_mode mode
;
2357 static enum rdma_nl_counter_mask mask
;
2358 struct ib_device
*device
;
2359 struct sk_buff
*msg
;
2363 if (tb
[RDMA_NLDEV_ATTR_STAT_COUNTER_ID
])
2364 return nldev_res_get_counter_doit(skb
, nlh
, extack
);
2366 if (!tb
[RDMA_NLDEV_ATTR_STAT_MODE
] ||
2367 !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
] || !tb
[RDMA_NLDEV_ATTR_PORT_INDEX
])
2370 index
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
2371 device
= ib_device_get_by_index(sock_net(skb
->sk
), index
);
2375 port
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]);
2376 if (!rdma_is_port_valid(device
, port
)) {
2381 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
2387 nlh
= nlmsg_put(msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
2388 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
,
2389 RDMA_NLDEV_CMD_STAT_GET
),
2396 ret
= rdma_counter_get_mode(device
, port
, &mode
, &mask
);
2400 if (fill_nldev_handle(msg
, device
) ||
2401 nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, port
) ||
2402 nla_put_u32(msg
, RDMA_NLDEV_ATTR_STAT_MODE
, mode
)) {
2407 if ((mode
== RDMA_COUNTER_MODE_AUTO
) &&
2408 nla_put_u32(msg
, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK
, mask
)) {
2413 nlmsg_end(msg
, nlh
);
2414 ib_device_put(device
);
2415 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
2420 ib_device_put(device
);
2424 static int nldev_stat_get_doit(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2425 struct netlink_ext_ack
*extack
)
2427 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
2430 ret
= __nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
2431 nldev_policy
, NL_VALIDATE_LIBERAL
, extack
);
2435 if (!tb
[RDMA_NLDEV_ATTR_STAT_RES
])
2436 return stat_get_doit_default_counter(skb
, nlh
, extack
, tb
);
2438 switch (nla_get_u32(tb
[RDMA_NLDEV_ATTR_STAT_RES
])) {
2439 case RDMA_NLDEV_ATTR_RES_QP
:
2440 ret
= stat_get_doit_qp(skb
, nlh
, extack
, tb
);
2442 case RDMA_NLDEV_ATTR_RES_MR
:
2443 ret
= res_get_common_doit(skb
, nlh
, extack
, RDMA_RESTRACK_MR
,
2444 fill_stat_mr_entry
);
2454 static int nldev_stat_get_dumpit(struct sk_buff
*skb
,
2455 struct netlink_callback
*cb
)
2457 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
2460 ret
= __nlmsg_parse(cb
->nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
2461 nldev_policy
, NL_VALIDATE_LIBERAL
, NULL
);
2462 if (ret
|| !tb
[RDMA_NLDEV_ATTR_STAT_RES
])
2465 switch (nla_get_u32(tb
[RDMA_NLDEV_ATTR_STAT_RES
])) {
2466 case RDMA_NLDEV_ATTR_RES_QP
:
2467 ret
= nldev_res_get_counter_dumpit(skb
, cb
);
2469 case RDMA_NLDEV_ATTR_RES_MR
:
2470 ret
= res_get_common_dumpit(skb
, cb
, RDMA_RESTRACK_MR
,
2471 fill_stat_mr_entry
);
2481 static int nldev_stat_get_counter_status_doit(struct sk_buff
*skb
,
2482 struct nlmsghdr
*nlh
,
2483 struct netlink_ext_ack
*extack
)
2485 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
], *table
, *entry
;
2486 struct rdma_hw_stats
*stats
;
2487 struct ib_device
*device
;
2488 struct sk_buff
*msg
;
2492 ret
= __nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
2493 nldev_policy
, NL_VALIDATE_LIBERAL
, extack
);
2494 if (ret
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
] ||
2495 !tb
[RDMA_NLDEV_ATTR_PORT_INDEX
])
2498 devid
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
2499 device
= ib_device_get_by_index(sock_net(skb
->sk
), devid
);
2503 port
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_PORT_INDEX
]);
2504 if (!rdma_is_port_valid(device
, port
)) {
2509 stats
= ib_get_hw_stats_port(device
, port
);
2515 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
2522 msg
, NETLINK_CB(skb
).portid
, nlh
->nlmsg_seq
,
2523 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
, RDMA_NLDEV_CMD_STAT_GET_STATUS
),
2527 if (!nlh
|| fill_nldev_handle(msg
, device
) ||
2528 nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, port
))
2531 table
= nla_nest_start(msg
, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS
);
2535 mutex_lock(&stats
->lock
);
2536 for (i
= 0; i
< stats
->num_counters
; i
++) {
2537 entry
= nla_nest_start(msg
,
2538 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY
);
2542 if (nla_put_string(msg
,
2543 RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME
,
2544 stats
->descs
[i
].name
) ||
2545 nla_put_u32(msg
, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX
, i
))
2548 if ((stats
->descs
[i
].flags
& IB_STAT_FLAG_OPTIONAL
) &&
2549 (nla_put_u8(msg
, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC
,
2550 !test_bit(i
, stats
->is_disabled
))))
2553 nla_nest_end(msg
, entry
);
2555 mutex_unlock(&stats
->lock
);
2557 nla_nest_end(msg
, table
);
2558 nlmsg_end(msg
, nlh
);
2559 ib_device_put(device
);
2560 return rdma_nl_unicast(sock_net(skb
->sk
), msg
, NETLINK_CB(skb
).portid
);
2563 nla_nest_cancel(msg
, entry
);
2565 mutex_unlock(&stats
->lock
);
2566 nla_nest_cancel(msg
, table
);
2570 ib_device_put(device
);
2574 static int nldev_newdev(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2575 struct netlink_ext_ack
*extack
)
2577 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
2578 enum rdma_nl_dev_type type
;
2579 struct ib_device
*parent
;
2580 char name
[IFNAMSIZ
] = {};
2584 ret
= nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
2585 nldev_policy
, extack
);
2586 if (ret
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
] ||
2587 !tb
[RDMA_NLDEV_ATTR_DEV_NAME
] || !tb
[RDMA_NLDEV_ATTR_DEV_TYPE
])
2590 nla_strscpy(name
, tb
[RDMA_NLDEV_ATTR_DEV_NAME
], sizeof(name
));
2591 type
= nla_get_u8(tb
[RDMA_NLDEV_ATTR_DEV_TYPE
]);
2592 parentid
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
2593 parent
= ib_device_get_by_index(sock_net(skb
->sk
), parentid
);
2597 ret
= ib_add_sub_device(parent
, type
, name
);
2598 ib_device_put(parent
);
2603 static int nldev_deldev(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2604 struct netlink_ext_ack
*extack
)
2606 struct nlattr
*tb
[RDMA_NLDEV_ATTR_MAX
];
2607 struct ib_device
*device
;
2611 ret
= nlmsg_parse(nlh
, 0, tb
, RDMA_NLDEV_ATTR_MAX
- 1,
2612 nldev_policy
, extack
);
2613 if (ret
|| !tb
[RDMA_NLDEV_ATTR_DEV_INDEX
])
2616 devid
= nla_get_u32(tb
[RDMA_NLDEV_ATTR_DEV_INDEX
]);
2617 device
= ib_device_get_by_index(sock_net(skb
->sk
), devid
);
2621 return ib_del_sub_device_and_put(device
);
2624 static const struct rdma_nl_cbs nldev_cb_table
[RDMA_NLDEV_NUM_OPS
] = {
2625 [RDMA_NLDEV_CMD_GET
] = {
2626 .doit
= nldev_get_doit
,
2627 .dump
= nldev_get_dumpit
,
2629 [RDMA_NLDEV_CMD_GET_CHARDEV
] = {
2630 .doit
= nldev_get_chardev
,
2632 [RDMA_NLDEV_CMD_SET
] = {
2633 .doit
= nldev_set_doit
,
2634 .flags
= RDMA_NL_ADMIN_PERM
,
2636 [RDMA_NLDEV_CMD_NEWLINK
] = {
2637 .doit
= nldev_newlink
,
2638 .flags
= RDMA_NL_ADMIN_PERM
,
2640 [RDMA_NLDEV_CMD_DELLINK
] = {
2641 .doit
= nldev_dellink
,
2642 .flags
= RDMA_NL_ADMIN_PERM
,
2644 [RDMA_NLDEV_CMD_PORT_GET
] = {
2645 .doit
= nldev_port_get_doit
,
2646 .dump
= nldev_port_get_dumpit
,
2648 [RDMA_NLDEV_CMD_RES_GET
] = {
2649 .doit
= nldev_res_get_doit
,
2650 .dump
= nldev_res_get_dumpit
,
2652 [RDMA_NLDEV_CMD_RES_QP_GET
] = {
2653 .doit
= nldev_res_get_qp_doit
,
2654 .dump
= nldev_res_get_qp_dumpit
,
2656 [RDMA_NLDEV_CMD_RES_CM_ID_GET
] = {
2657 .doit
= nldev_res_get_cm_id_doit
,
2658 .dump
= nldev_res_get_cm_id_dumpit
,
2660 [RDMA_NLDEV_CMD_RES_CQ_GET
] = {
2661 .doit
= nldev_res_get_cq_doit
,
2662 .dump
= nldev_res_get_cq_dumpit
,
2664 [RDMA_NLDEV_CMD_RES_MR_GET
] = {
2665 .doit
= nldev_res_get_mr_doit
,
2666 .dump
= nldev_res_get_mr_dumpit
,
2668 [RDMA_NLDEV_CMD_RES_PD_GET
] = {
2669 .doit
= nldev_res_get_pd_doit
,
2670 .dump
= nldev_res_get_pd_dumpit
,
2672 [RDMA_NLDEV_CMD_RES_CTX_GET
] = {
2673 .doit
= nldev_res_get_ctx_doit
,
2674 .dump
= nldev_res_get_ctx_dumpit
,
2676 [RDMA_NLDEV_CMD_RES_SRQ_GET
] = {
2677 .doit
= nldev_res_get_srq_doit
,
2678 .dump
= nldev_res_get_srq_dumpit
,
2680 [RDMA_NLDEV_CMD_SYS_GET
] = {
2681 .doit
= nldev_sys_get_doit
,
2683 [RDMA_NLDEV_CMD_SYS_SET
] = {
2684 .doit
= nldev_set_sys_set_doit
,
2685 .flags
= RDMA_NL_ADMIN_PERM
,
2687 [RDMA_NLDEV_CMD_STAT_SET
] = {
2688 .doit
= nldev_stat_set_doit
,
2689 .flags
= RDMA_NL_ADMIN_PERM
,
2691 [RDMA_NLDEV_CMD_STAT_GET
] = {
2692 .doit
= nldev_stat_get_doit
,
2693 .dump
= nldev_stat_get_dumpit
,
2695 [RDMA_NLDEV_CMD_STAT_DEL
] = {
2696 .doit
= nldev_stat_del_doit
,
2697 .flags
= RDMA_NL_ADMIN_PERM
,
2699 [RDMA_NLDEV_CMD_RES_QP_GET_RAW
] = {
2700 .doit
= nldev_res_get_qp_raw_doit
,
2701 .dump
= nldev_res_get_qp_raw_dumpit
,
2702 .flags
= RDMA_NL_ADMIN_PERM
,
2704 [RDMA_NLDEV_CMD_RES_CQ_GET_RAW
] = {
2705 .doit
= nldev_res_get_cq_raw_doit
,
2706 .dump
= nldev_res_get_cq_raw_dumpit
,
2707 .flags
= RDMA_NL_ADMIN_PERM
,
2709 [RDMA_NLDEV_CMD_RES_MR_GET_RAW
] = {
2710 .doit
= nldev_res_get_mr_raw_doit
,
2711 .dump
= nldev_res_get_mr_raw_dumpit
,
2712 .flags
= RDMA_NL_ADMIN_PERM
,
2714 [RDMA_NLDEV_CMD_RES_SRQ_GET_RAW
] = {
2715 .doit
= nldev_res_get_srq_raw_doit
,
2716 .dump
= nldev_res_get_srq_raw_dumpit
,
2717 .flags
= RDMA_NL_ADMIN_PERM
,
2719 [RDMA_NLDEV_CMD_STAT_GET_STATUS
] = {
2720 .doit
= nldev_stat_get_counter_status_doit
,
2722 [RDMA_NLDEV_CMD_NEWDEV
] = {
2723 .doit
= nldev_newdev
,
2724 .flags
= RDMA_NL_ADMIN_PERM
,
2726 [RDMA_NLDEV_CMD_DELDEV
] = {
2727 .doit
= nldev_deldev
,
2728 .flags
= RDMA_NL_ADMIN_PERM
,
2732 static int fill_mon_netdev_rename(struct sk_buff
*msg
,
2733 struct ib_device
*device
, u32 port
,
2734 const struct net
*net
)
2736 struct net_device
*netdev
= ib_device_get_netdev(device
, port
);
2739 if (!netdev
|| !net_eq(dev_net(netdev
), net
))
2742 ret
= nla_put_u32(msg
, RDMA_NLDEV_ATTR_NDEV_INDEX
, netdev
->ifindex
);
2745 ret
= nla_put_string(msg
, RDMA_NLDEV_ATTR_NDEV_NAME
, netdev
->name
);
2751 static int fill_mon_netdev_association(struct sk_buff
*msg
,
2752 struct ib_device
*device
, u32 port
,
2753 const struct net
*net
)
2755 struct net_device
*netdev
= ib_device_get_netdev(device
, port
);
2758 if (netdev
&& !net_eq(dev_net(netdev
), net
))
2761 ret
= nla_put_u32(msg
, RDMA_NLDEV_ATTR_DEV_INDEX
, device
->index
);
2765 ret
= nla_put_string(msg
, RDMA_NLDEV_ATTR_DEV_NAME
,
2766 dev_name(&device
->dev
));
2770 ret
= nla_put_u32(msg
, RDMA_NLDEV_ATTR_PORT_INDEX
, port
);
2775 ret
= nla_put_u32(msg
,
2776 RDMA_NLDEV_ATTR_NDEV_INDEX
, netdev
->ifindex
);
2780 ret
= nla_put_string(msg
,
2781 RDMA_NLDEV_ATTR_NDEV_NAME
, netdev
->name
);
2789 static void rdma_nl_notify_err_msg(struct ib_device
*device
, u32 port_num
,
2790 enum rdma_nl_notify_event_type type
)
2792 struct net_device
*netdev
;
2795 case RDMA_REGISTER_EVENT
:
2796 dev_warn_ratelimited(&device
->dev
,
2797 "Failed to send RDMA monitor register device event\n");
2799 case RDMA_UNREGISTER_EVENT
:
2800 dev_warn_ratelimited(&device
->dev
,
2801 "Failed to send RDMA monitor unregister device event\n");
2803 case RDMA_NETDEV_ATTACH_EVENT
:
2804 netdev
= ib_device_get_netdev(device
, port_num
);
2805 dev_warn_ratelimited(&device
->dev
,
2806 "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
2807 port_num
, netdev
->ifindex
);
2810 case RDMA_NETDEV_DETACH_EVENT
:
2811 dev_warn_ratelimited(&device
->dev
,
2812 "Failed to send RDMA monitor netdev detach event: port %d\n",
2815 case RDMA_RENAME_EVENT
:
2816 dev_warn_ratelimited(&device
->dev
,
2817 "Failed to send RDMA monitor rename device event\n");
2820 case RDMA_NETDEV_RENAME_EVENT
:
2821 netdev
= ib_device_get_netdev(device
, port_num
);
2822 dev_warn_ratelimited(&device
->dev
,
2823 "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
2824 port_num
, netdev
->ifindex
);
2832 int rdma_nl_notify_event(struct ib_device
*device
, u32 port_num
,
2833 enum rdma_nl_notify_event_type type
)
2835 struct sk_buff
*skb
;
2840 net
= read_pnet(&device
->coredev
.rdma_net
);
2844 skb
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
2847 nlh
= nlmsg_put(skb
, 0, 0,
2848 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV
, RDMA_NLDEV_CMD_MONITOR
),
2854 case RDMA_REGISTER_EVENT
:
2855 case RDMA_UNREGISTER_EVENT
:
2856 case RDMA_RENAME_EVENT
:
2857 ret
= fill_nldev_handle(skb
, device
);
2861 case RDMA_NETDEV_ATTACH_EVENT
:
2862 case RDMA_NETDEV_DETACH_EVENT
:
2863 ret
= fill_mon_netdev_association(skb
, device
, port_num
, net
);
2867 case RDMA_NETDEV_RENAME_EVENT
:
2868 ret
= fill_mon_netdev_rename(skb
, device
, port_num
, net
);
2876 ret
= nla_put_u8(skb
, RDMA_NLDEV_ATTR_EVENT_TYPE
, type
);
2880 nlmsg_end(skb
, nlh
);
2881 ret
= rdma_nl_multicast(net
, skb
, RDMA_NL_GROUP_NOTIFY
, GFP_KERNEL
);
2882 if (ret
&& ret
!= -ESRCH
) {
2883 skb
= NULL
; /* skb is freed in the netlink send-op handling */
2889 rdma_nl_notify_err_msg(device
, port_num
, type
);
2894 void __init
nldev_init(void)
2896 rdma_nl_register(RDMA_NL_NLDEV
, nldev_cb_table
);
2899 void nldev_exit(void)
2901 rdma_nl_unregister(RDMA_NL_NLDEV
);
2904 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV
, 5);