1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
5 #include <rdma/ib_verbs.h>
6 #include <rdma/rdma_counter.h>
11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE)
13 static int __counter_set_mode(struct rdma_counter_mode
*curr
,
14 enum rdma_nl_counter_mode new_mode
,
15 enum rdma_nl_counter_mask new_mask
)
17 if ((new_mode
== RDMA_COUNTER_MODE_AUTO
) &&
18 ((new_mask
& (~ALL_AUTO_MODE_MASKS
)) ||
19 (curr
->mode
!= RDMA_COUNTER_MODE_NONE
)))
22 curr
->mode
= new_mode
;
23 curr
->mask
= new_mask
;
28 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
30 * When @on is true, the @mask must be set; When @on is false, it goes
31 * into manual mode if there's any counter, so that the user is able to
32 * manually access them.
34 int rdma_counter_set_auto_mode(struct ib_device
*dev
, u8 port
,
35 bool on
, enum rdma_nl_counter_mask mask
)
37 struct rdma_port_counter
*port_counter
;
40 port_counter
= &dev
->port_data
[port
].port_counter
;
41 if (!port_counter
->hstats
)
44 mutex_lock(&port_counter
->lock
);
46 ret
= __counter_set_mode(&port_counter
->mode
,
47 RDMA_COUNTER_MODE_AUTO
, mask
);
49 if (port_counter
->mode
.mode
!= RDMA_COUNTER_MODE_AUTO
) {
54 if (port_counter
->num_counters
)
55 ret
= __counter_set_mode(&port_counter
->mode
,
56 RDMA_COUNTER_MODE_MANUAL
, 0);
58 ret
= __counter_set_mode(&port_counter
->mode
,
59 RDMA_COUNTER_MODE_NONE
, 0);
63 mutex_unlock(&port_counter
->lock
);
67 static struct rdma_counter
*rdma_counter_alloc(struct ib_device
*dev
, u8 port
,
68 enum rdma_nl_counter_mode mode
)
70 struct rdma_port_counter
*port_counter
;
71 struct rdma_counter
*counter
;
74 if (!dev
->ops
.counter_dealloc
|| !dev
->ops
.counter_alloc_stats
)
77 counter
= kzalloc(sizeof(*counter
), GFP_KERNEL
);
81 counter
->device
= dev
;
83 counter
->res
.type
= RDMA_RESTRACK_COUNTER
;
84 counter
->stats
= dev
->ops
.counter_alloc_stats(counter
);
88 port_counter
= &dev
->port_data
[port
].port_counter
;
89 mutex_lock(&port_counter
->lock
);
90 if (mode
== RDMA_COUNTER_MODE_MANUAL
) {
91 ret
= __counter_set_mode(&port_counter
->mode
,
92 RDMA_COUNTER_MODE_MANUAL
, 0);
97 port_counter
->num_counters
++;
98 mutex_unlock(&port_counter
->lock
);
100 counter
->mode
.mode
= mode
;
101 kref_init(&counter
->kref
);
102 mutex_init(&counter
->lock
);
107 mutex_unlock(&port_counter
->lock
);
108 kfree(counter
->stats
);
114 static void rdma_counter_free(struct rdma_counter
*counter
)
116 struct rdma_port_counter
*port_counter
;
118 port_counter
= &counter
->device
->port_data
[counter
->port
].port_counter
;
119 mutex_lock(&port_counter
->lock
);
120 port_counter
->num_counters
--;
121 if (!port_counter
->num_counters
&&
122 (port_counter
->mode
.mode
== RDMA_COUNTER_MODE_MANUAL
))
123 __counter_set_mode(&port_counter
->mode
, RDMA_COUNTER_MODE_NONE
,
126 mutex_unlock(&port_counter
->lock
);
128 rdma_restrack_del(&counter
->res
);
129 kfree(counter
->stats
);
133 static void auto_mode_init_counter(struct rdma_counter
*counter
,
134 const struct ib_qp
*qp
,
135 enum rdma_nl_counter_mask new_mask
)
137 struct auto_mode_param
*param
= &counter
->mode
.param
;
139 counter
->mode
.mode
= RDMA_COUNTER_MODE_AUTO
;
140 counter
->mode
.mask
= new_mask
;
142 if (new_mask
& RDMA_COUNTER_MASK_QP_TYPE
)
143 param
->qp_type
= qp
->qp_type
;
146 static bool auto_mode_match(struct ib_qp
*qp
, struct rdma_counter
*counter
,
147 enum rdma_nl_counter_mask auto_mask
)
149 struct auto_mode_param
*param
= &counter
->mode
.param
;
153 * Ensure that counter belongs to the right PID. This operation can
154 * race with user space which kills the process and leaves QP and
157 * It is not a big deal because exitted task will leave both QP and
158 * counter in the same bucket of zombie process. Just ensure that
159 * process is still alive before procedding.
162 if (task_pid_nr(counter
->res
.task
) != task_pid_nr(qp
->res
.task
) ||
163 !task_pid_nr(qp
->res
.task
))
166 if (auto_mask
& RDMA_COUNTER_MASK_QP_TYPE
)
167 match
&= (param
->qp_type
== qp
->qp_type
);
172 static int __rdma_counter_bind_qp(struct rdma_counter
*counter
,
180 if (!qp
->device
->ops
.counter_bind_qp
)
183 mutex_lock(&counter
->lock
);
184 ret
= qp
->device
->ops
.counter_bind_qp(counter
, qp
);
185 mutex_unlock(&counter
->lock
);
190 static int __rdma_counter_unbind_qp(struct ib_qp
*qp
)
192 struct rdma_counter
*counter
= qp
->counter
;
195 if (!qp
->device
->ops
.counter_unbind_qp
)
198 mutex_lock(&counter
->lock
);
199 ret
= qp
->device
->ops
.counter_unbind_qp(qp
);
200 mutex_unlock(&counter
->lock
);
205 static void counter_history_stat_update(const struct rdma_counter
*counter
)
207 struct ib_device
*dev
= counter
->device
;
208 struct rdma_port_counter
*port_counter
;
211 port_counter
= &dev
->port_data
[counter
->port
].port_counter
;
212 if (!port_counter
->hstats
)
215 for (i
= 0; i
< counter
->stats
->num_counters
; i
++)
216 port_counter
->hstats
->value
[i
] += counter
->stats
->value
[i
];
220 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound
223 * Return: The counter (with ref-count increased) if found
225 static struct rdma_counter
*rdma_get_counter_auto_mode(struct ib_qp
*qp
,
228 struct rdma_port_counter
*port_counter
;
229 struct rdma_counter
*counter
= NULL
;
230 struct ib_device
*dev
= qp
->device
;
231 struct rdma_restrack_entry
*res
;
232 struct rdma_restrack_root
*rt
;
233 unsigned long id
= 0;
235 port_counter
= &dev
->port_data
[port
].port_counter
;
236 rt
= &dev
->res
[RDMA_RESTRACK_COUNTER
];
238 xa_for_each(&rt
->xa
, id
, res
) {
239 counter
= container_of(res
, struct rdma_counter
, res
);
240 if ((counter
->device
!= qp
->device
) || (counter
->port
!= port
))
243 if (auto_mode_match(qp
, counter
, port_counter
->mode
.mask
))
249 if (counter
&& !kref_get_unless_zero(&counter
->kref
))
256 static void rdma_counter_res_add(struct rdma_counter
*counter
,
259 if (rdma_is_kernel_res(&qp
->res
)) {
260 rdma_restrack_set_task(&counter
->res
, qp
->res
.kern_name
);
261 rdma_restrack_kadd(&counter
->res
);
263 rdma_restrack_attach_task(&counter
->res
, qp
->res
.task
);
264 rdma_restrack_uadd(&counter
->res
);
268 static void counter_release(struct kref
*kref
)
270 struct rdma_counter
*counter
;
272 counter
= container_of(kref
, struct rdma_counter
, kref
);
273 counter_history_stat_update(counter
);
274 counter
->device
->ops
.counter_dealloc(counter
);
275 rdma_counter_free(counter
);
279 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
282 int rdma_counter_bind_qp_auto(struct ib_qp
*qp
, u8 port
)
284 struct rdma_port_counter
*port_counter
;
285 struct ib_device
*dev
= qp
->device
;
286 struct rdma_counter
*counter
;
292 if (!rdma_is_port_valid(dev
, port
))
295 port_counter
= &dev
->port_data
[port
].port_counter
;
296 if (port_counter
->mode
.mode
!= RDMA_COUNTER_MODE_AUTO
)
299 counter
= rdma_get_counter_auto_mode(qp
, port
);
301 ret
= __rdma_counter_bind_qp(counter
, qp
);
303 kref_put(&counter
->kref
, counter_release
);
307 counter
= rdma_counter_alloc(dev
, port
, RDMA_COUNTER_MODE_AUTO
);
311 auto_mode_init_counter(counter
, qp
, port_counter
->mode
.mask
);
313 ret
= __rdma_counter_bind_qp(counter
, qp
);
315 rdma_counter_free(counter
);
319 rdma_counter_res_add(counter
, qp
);
326 * rdma_counter_unbind_qp - Unbind a qp from a counter
328 * true - Decrease the counter ref-count anyway (e.g., qp destroy)
330 int rdma_counter_unbind_qp(struct ib_qp
*qp
, bool force
)
332 struct rdma_counter
*counter
= qp
->counter
;
338 ret
= __rdma_counter_unbind_qp(qp
);
342 kref_put(&counter
->kref
, counter_release
);
346 int rdma_counter_query_stats(struct rdma_counter
*counter
)
348 struct ib_device
*dev
= counter
->device
;
351 if (!dev
->ops
.counter_update_stats
)
354 mutex_lock(&counter
->lock
);
355 ret
= dev
->ops
.counter_update_stats(counter
);
356 mutex_unlock(&counter
->lock
);
361 static u64
get_running_counters_hwstat_sum(struct ib_device
*dev
,
364 struct rdma_restrack_entry
*res
;
365 struct rdma_restrack_root
*rt
;
366 struct rdma_counter
*counter
;
367 unsigned long id
= 0;
370 rt
= &dev
->res
[RDMA_RESTRACK_COUNTER
];
372 xa_for_each(&rt
->xa
, id
, res
) {
373 if (!rdma_restrack_get(res
))
378 counter
= container_of(res
, struct rdma_counter
, res
);
379 if ((counter
->device
!= dev
) || (counter
->port
!= port
) ||
380 rdma_counter_query_stats(counter
))
383 sum
+= counter
->stats
->value
[index
];
387 rdma_restrack_put(res
);
395 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
396 * specific port, including the running ones and history data
398 u64
rdma_counter_get_hwstat_value(struct ib_device
*dev
, u8 port
, u32 index
)
400 struct rdma_port_counter
*port_counter
;
403 port_counter
= &dev
->port_data
[port
].port_counter
;
404 if (!port_counter
->hstats
)
407 sum
= get_running_counters_hwstat_sum(dev
, port
, index
);
408 sum
+= port_counter
->hstats
->value
[index
];
413 static struct ib_qp
*rdma_counter_get_qp(struct ib_device
*dev
, u32 qp_num
)
415 struct rdma_restrack_entry
*res
= NULL
;
416 struct ib_qp
*qp
= NULL
;
418 res
= rdma_restrack_get_byid(dev
, RDMA_RESTRACK_QP
, qp_num
);
422 qp
= container_of(res
, struct ib_qp
, res
);
423 if (qp
->qp_type
== IB_QPT_RAW_PACKET
&& !capable(CAP_NET_RAW
))
429 rdma_restrack_put(res
);
433 static int rdma_counter_bind_qp_manual(struct rdma_counter
*counter
,
436 if ((counter
->device
!= qp
->device
) || (counter
->port
!= qp
->port
))
439 return __rdma_counter_bind_qp(counter
, qp
);
442 static struct rdma_counter
*rdma_get_counter_by_id(struct ib_device
*dev
,
445 struct rdma_restrack_entry
*res
;
446 struct rdma_counter
*counter
;
448 res
= rdma_restrack_get_byid(dev
, RDMA_RESTRACK_COUNTER
, counter_id
);
452 counter
= container_of(res
, struct rdma_counter
, res
);
453 kref_get(&counter
->kref
);
454 rdma_restrack_put(res
);
460 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
462 int rdma_counter_bind_qpn(struct ib_device
*dev
, u8 port
,
463 u32 qp_num
, u32 counter_id
)
465 struct rdma_port_counter
*port_counter
;
466 struct rdma_counter
*counter
;
470 port_counter
= &dev
->port_data
[port
].port_counter
;
471 if (port_counter
->mode
.mode
== RDMA_COUNTER_MODE_AUTO
)
474 qp
= rdma_counter_get_qp(dev
, qp_num
);
478 counter
= rdma_get_counter_by_id(dev
, counter_id
);
484 if (counter
->res
.task
!= qp
->res
.task
) {
489 ret
= rdma_counter_bind_qp_manual(counter
, qp
);
493 rdma_restrack_put(&qp
->res
);
497 kref_put(&counter
->kref
, counter_release
);
499 rdma_restrack_put(&qp
->res
);
504 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
505 * The id of new counter is returned in @counter_id
507 int rdma_counter_bind_qpn_alloc(struct ib_device
*dev
, u8 port
,
508 u32 qp_num
, u32
*counter_id
)
510 struct rdma_port_counter
*port_counter
;
511 struct rdma_counter
*counter
;
515 if (!rdma_is_port_valid(dev
, port
))
518 port_counter
= &dev
->port_data
[port
].port_counter
;
519 if (!port_counter
->hstats
)
522 if (port_counter
->mode
.mode
== RDMA_COUNTER_MODE_AUTO
)
525 qp
= rdma_counter_get_qp(dev
, qp_num
);
529 if (rdma_is_port_valid(dev
, qp
->port
) && (qp
->port
!= port
)) {
534 counter
= rdma_counter_alloc(dev
, port
, RDMA_COUNTER_MODE_MANUAL
);
540 ret
= rdma_counter_bind_qp_manual(counter
, qp
);
545 *counter_id
= counter
->id
;
547 rdma_counter_res_add(counter
, qp
);
549 rdma_restrack_put(&qp
->res
);
553 rdma_counter_free(counter
);
555 rdma_restrack_put(&qp
->res
);
560 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
562 int rdma_counter_unbind_qpn(struct ib_device
*dev
, u8 port
,
563 u32 qp_num
, u32 counter_id
)
565 struct rdma_port_counter
*port_counter
;
569 if (!rdma_is_port_valid(dev
, port
))
572 qp
= rdma_counter_get_qp(dev
, qp_num
);
576 if (rdma_is_port_valid(dev
, qp
->port
) && (qp
->port
!= port
)) {
581 port_counter
= &dev
->port_data
[port
].port_counter
;
582 if (!qp
->counter
|| qp
->counter
->id
!= counter_id
||
583 port_counter
->mode
.mode
!= RDMA_COUNTER_MODE_MANUAL
) {
588 ret
= rdma_counter_unbind_qp(qp
, false);
591 rdma_restrack_put(&qp
->res
);
595 int rdma_counter_get_mode(struct ib_device
*dev
, u8 port
,
596 enum rdma_nl_counter_mode
*mode
,
597 enum rdma_nl_counter_mask
*mask
)
599 struct rdma_port_counter
*port_counter
;
601 port_counter
= &dev
->port_data
[port
].port_counter
;
602 *mode
= port_counter
->mode
.mode
;
603 *mask
= port_counter
->mode
.mask
;
608 void rdma_counter_init(struct ib_device
*dev
)
610 struct rdma_port_counter
*port_counter
;
616 rdma_for_each_port(dev
, port
) {
617 port_counter
= &dev
->port_data
[port
].port_counter
;
618 port_counter
->mode
.mode
= RDMA_COUNTER_MODE_NONE
;
619 mutex_init(&port_counter
->lock
);
621 if (!dev
->ops
.alloc_hw_stats
)
624 port_counter
->hstats
= dev
->ops
.alloc_hw_stats(dev
, port
);
625 if (!port_counter
->hstats
)
632 for (i
= port
; i
>= rdma_start_port(dev
); i
--) {
633 port_counter
= &dev
->port_data
[port
].port_counter
;
634 kfree(port_counter
->hstats
);
635 port_counter
->hstats
= NULL
;
636 mutex_destroy(&port_counter
->lock
);
640 void rdma_counter_release(struct ib_device
*dev
)
642 struct rdma_port_counter
*port_counter
;
645 rdma_for_each_port(dev
, port
) {
646 port_counter
= &dev
->port_data
[port
].port_counter
;
647 kfree(port_counter
->hstats
);
648 mutex_destroy(&port_counter
->lock
);