Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / drivers / net / ethernet / sfc / tc_counters.c
bloba421b01235069453ee59034b9301ec6838ded933
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2022 Advanced Micro Devices, Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
11 #include "tc_counters.h"
12 #include "tc_encap_actions.h"
13 #include "mae_counter_format.h"
14 #include "mae.h"
15 #include "rx_common.h"
17 /* Counter-management hashtables */
19 static const struct rhashtable_params efx_tc_counter_id_ht_params = {
20 .key_len = offsetof(struct efx_tc_counter_index, linkage),
21 .key_offset = 0,
22 .head_offset = offsetof(struct efx_tc_counter_index, linkage),
25 static const struct rhashtable_params efx_tc_counter_ht_params = {
26 .key_len = offsetof(struct efx_tc_counter, linkage),
27 .key_offset = 0,
28 .head_offset = offsetof(struct efx_tc_counter, linkage),
31 static void efx_tc_counter_free(void *ptr, void *__unused)
33 struct efx_tc_counter *cnt = ptr;
35 WARN_ON(!list_empty(&cnt->users));
36 /* We'd like to synchronize_rcu() here, but unfortunately we aren't
37 * removing the element from the hashtable (it's not clear that's a
38 * safe thing to do in an rhashtable_free_and_destroy free_fn), so
39 * threads could still be obtaining new pointers to *cnt if they can
40 * race against this function at all.
42 flush_work(&cnt->work);
43 EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
44 kfree(cnt);
47 static void efx_tc_counter_id_free(void *ptr, void *__unused)
49 struct efx_tc_counter_index *ctr = ptr;
51 WARN_ON(refcount_read(&ctr->ref));
52 kfree(ctr);
55 int efx_tc_init_counters(struct efx_nic *efx)
57 int rc;
59 rc = rhashtable_init(&efx->tc->counter_id_ht, &efx_tc_counter_id_ht_params);
60 if (rc < 0)
61 goto fail_counter_id_ht;
62 rc = rhashtable_init(&efx->tc->counter_ht, &efx_tc_counter_ht_params);
63 if (rc < 0)
64 goto fail_counter_ht;
65 return 0;
66 fail_counter_ht:
67 rhashtable_destroy(&efx->tc->counter_id_ht);
68 fail_counter_id_ht:
69 return rc;
72 /* Only call this in init failure teardown.
73 * Normal exit should fini instead as there may be entries in the table.
75 void efx_tc_destroy_counters(struct efx_nic *efx)
77 rhashtable_destroy(&efx->tc->counter_ht);
78 rhashtable_destroy(&efx->tc->counter_id_ht);
81 void efx_tc_fini_counters(struct efx_nic *efx)
83 rhashtable_free_and_destroy(&efx->tc->counter_id_ht, efx_tc_counter_id_free, NULL);
84 rhashtable_free_and_destroy(&efx->tc->counter_ht, efx_tc_counter_free, NULL);
87 static void efx_tc_counter_work(struct work_struct *work)
89 struct efx_tc_counter *cnt = container_of(work, struct efx_tc_counter, work);
90 struct efx_tc_encap_action *encap;
91 struct efx_tc_action_set *act;
92 unsigned long touched;
93 struct neighbour *n;
95 spin_lock_bh(&cnt->lock);
96 touched = READ_ONCE(cnt->touched);
98 list_for_each_entry(act, &cnt->users, count_user) {
99 encap = act->encap_md;
100 if (!encap)
101 continue;
102 if (!encap->neigh) /* can't happen */
103 continue;
104 if (time_after_eq(encap->neigh->used, touched))
105 continue;
106 encap->neigh->used = touched;
107 /* We have passed traffic using this ARP entry, so
108 * indicate to the ARP cache that it's still active
110 if (encap->neigh->dst_ip)
111 n = neigh_lookup(&arp_tbl, &encap->neigh->dst_ip,
112 encap->neigh->egdev);
113 else
114 #if IS_ENABLED(CONFIG_IPV6)
115 n = neigh_lookup(ipv6_stub->nd_tbl,
116 &encap->neigh->dst_ip6,
117 encap->neigh->egdev);
118 #else
119 n = NULL;
120 #endif
121 if (!n)
122 continue;
124 neigh_event_send(n, NULL);
125 neigh_release(n);
127 spin_unlock_bh(&cnt->lock);
130 /* Counter allocation */
132 struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx,
133 int type)
135 struct efx_tc_counter *cnt;
136 int rc, rc2;
138 cnt = kzalloc(sizeof(*cnt), GFP_USER);
139 if (!cnt)
140 return ERR_PTR(-ENOMEM);
142 spin_lock_init(&cnt->lock);
143 INIT_WORK(&cnt->work, efx_tc_counter_work);
144 cnt->touched = jiffies;
145 cnt->type = type;
147 rc = efx_mae_allocate_counter(efx, cnt);
148 if (rc)
149 goto fail1;
150 INIT_LIST_HEAD(&cnt->users);
151 rc = rhashtable_insert_fast(&efx->tc->counter_ht, &cnt->linkage,
152 efx_tc_counter_ht_params);
153 if (rc)
154 goto fail2;
155 return cnt;
156 fail2:
157 /* If we get here, it implies that we couldn't insert into the table,
158 * which in turn probably means that the fw_id was already taken.
159 * In that case, it's unclear whether we really 'own' the fw_id; but
160 * the firmware seemed to think we did, so it's proper to free it.
162 rc2 = efx_mae_free_counter(efx, cnt);
163 if (rc2)
164 netif_warn(efx, hw, efx->net_dev,
165 "Failed to free MAE counter %u, rc %d\n",
166 cnt->fw_id, rc2);
167 fail1:
168 kfree(cnt);
169 return ERR_PTR(rc > 0 ? -EIO : rc);
172 void efx_tc_flower_release_counter(struct efx_nic *efx,
173 struct efx_tc_counter *cnt)
175 int rc;
177 rhashtable_remove_fast(&efx->tc->counter_ht, &cnt->linkage,
178 efx_tc_counter_ht_params);
179 rc = efx_mae_free_counter(efx, cnt);
180 if (rc)
181 netif_warn(efx, hw, efx->net_dev,
182 "Failed to free MAE counter %u, rc %d\n",
183 cnt->fw_id, rc);
184 WARN_ON(!list_empty(&cnt->users));
185 /* This doesn't protect counter updates coming in arbitrarily long
186 * after we deleted the counter. The RCU just ensures that we won't
187 * free the counter while another thread has a pointer to it.
188 * Ensuring we don't update the wrong counter if the ID gets re-used
189 * is handled by the generation count.
191 synchronize_rcu();
192 flush_work(&cnt->work);
193 EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
194 kfree(cnt);
197 static struct efx_tc_counter *efx_tc_flower_find_counter_by_fw_id(
198 struct efx_nic *efx, int type, u32 fw_id)
200 struct efx_tc_counter key = {};
202 key.fw_id = fw_id;
203 key.type = type;
205 return rhashtable_lookup_fast(&efx->tc->counter_ht, &key,
206 efx_tc_counter_ht_params);
209 /* TC cookie to counter mapping */
211 void efx_tc_flower_put_counter_index(struct efx_nic *efx,
212 struct efx_tc_counter_index *ctr)
214 if (!refcount_dec_and_test(&ctr->ref))
215 return; /* still in use */
216 rhashtable_remove_fast(&efx->tc->counter_id_ht, &ctr->linkage,
217 efx_tc_counter_id_ht_params);
218 efx_tc_flower_release_counter(efx, ctr->cnt);
219 kfree(ctr);
222 struct efx_tc_counter_index *efx_tc_flower_get_counter_index(
223 struct efx_nic *efx, unsigned long cookie,
224 enum efx_tc_counter_type type)
226 struct efx_tc_counter_index *ctr, *old;
227 struct efx_tc_counter *cnt;
229 ctr = kzalloc(sizeof(*ctr), GFP_USER);
230 if (!ctr)
231 return ERR_PTR(-ENOMEM);
232 ctr->cookie = cookie;
233 old = rhashtable_lookup_get_insert_fast(&efx->tc->counter_id_ht,
234 &ctr->linkage,
235 efx_tc_counter_id_ht_params);
236 if (old) {
237 /* don't need our new entry */
238 kfree(ctr);
239 if (IS_ERR(old)) /* oh dear, it's actually an error */
240 return ERR_CAST(old);
241 if (!refcount_inc_not_zero(&old->ref))
242 return ERR_PTR(-EAGAIN);
243 /* existing entry found */
244 ctr = old;
245 } else {
246 cnt = efx_tc_flower_allocate_counter(efx, type);
247 if (IS_ERR(cnt)) {
248 rhashtable_remove_fast(&efx->tc->counter_id_ht,
249 &ctr->linkage,
250 efx_tc_counter_id_ht_params);
251 kfree(ctr);
252 return ERR_CAST(cnt);
254 ctr->cnt = cnt;
255 refcount_set(&ctr->ref, 1);
257 return ctr;
260 struct efx_tc_counter_index *efx_tc_flower_find_counter_index(
261 struct efx_nic *efx, unsigned long cookie)
263 struct efx_tc_counter_index key = {};
265 key.cookie = cookie;
266 return rhashtable_lookup_fast(&efx->tc->counter_id_ht, &key,
267 efx_tc_counter_id_ht_params);
270 /* TC Channel. Counter updates are delivered on this channel's RXQ. */
272 static void efx_tc_handle_no_channel(struct efx_nic *efx)
274 netif_warn(efx, drv, efx->net_dev,
275 "MAE counters require MSI-X and 1 additional interrupt vector.\n");
278 static int efx_tc_probe_channel(struct efx_channel *channel)
280 struct efx_rx_queue *rx_queue = &channel->rx_queue;
282 channel->irq_moderation_us = 0;
283 rx_queue->core_index = 0;
285 INIT_WORK(&rx_queue->grant_work, efx_mae_counters_grant_credits);
287 return 0;
290 static int efx_tc_start_channel(struct efx_channel *channel)
292 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
293 struct efx_nic *efx = channel->efx;
295 return efx_mae_start_counters(efx, rx_queue);
298 static void efx_tc_stop_channel(struct efx_channel *channel)
300 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
301 struct efx_nic *efx = channel->efx;
302 int rc;
304 rc = efx_mae_stop_counters(efx, rx_queue);
305 if (rc)
306 netif_warn(efx, drv, efx->net_dev,
307 "Failed to stop MAE counters streaming, rc=%d.\n",
308 rc);
309 rx_queue->grant_credits = false;
310 flush_work(&rx_queue->grant_work);
313 static void efx_tc_remove_channel(struct efx_channel *channel)
317 static void efx_tc_get_channel_name(struct efx_channel *channel,
318 char *buf, size_t len)
320 snprintf(buf, len, "%s-mae", channel->efx->name);
323 static void efx_tc_counter_update(struct efx_nic *efx,
324 enum efx_tc_counter_type counter_type,
325 u32 counter_idx, u64 packets, u64 bytes,
326 u32 mark)
328 struct efx_tc_counter *cnt;
330 rcu_read_lock(); /* Protect against deletion of 'cnt' */
331 cnt = efx_tc_flower_find_counter_by_fw_id(efx, counter_type, counter_idx);
332 if (!cnt) {
333 /* This can legitimately happen when a counter is removed,
334 * with updates for the counter still in-flight; however this
335 * should be an infrequent occurrence.
337 if (net_ratelimit())
338 netif_dbg(efx, drv, efx->net_dev,
339 "Got update for unwanted MAE counter %u type %u\n",
340 counter_idx, counter_type);
341 goto out;
344 spin_lock_bh(&cnt->lock);
345 if ((s32)mark - (s32)cnt->gen < 0) {
346 /* This counter update packet is from before the counter was
347 * allocated; thus it must be for a previous counter with
348 * the same ID that has since been freed, and it should be
349 * ignored.
351 } else {
352 /* Update latest seen generation count. This ensures that
353 * even a long-lived counter won't start getting ignored if
354 * the generation count wraps around, unless it somehow
355 * manages to go 1<<31 generations without an update.
357 cnt->gen = mark;
358 /* update counter values */
359 cnt->packets += packets;
360 cnt->bytes += bytes;
361 cnt->touched = jiffies;
363 spin_unlock_bh(&cnt->lock);
364 schedule_work(&cnt->work);
365 out:
366 rcu_read_unlock();
369 static void efx_tc_rx_version_1(struct efx_nic *efx, const u8 *data, u32 mark)
371 u16 n_counters, i;
373 /* Header format:
374 * + | 0 | 1 | 2 | 3 |
375 * 0 |version | reserved |
376 * 4 | seq_index | n_counters |
379 n_counters = le16_to_cpu(*(const __le16 *)(data + 6));
381 /* Counter update entry format:
382 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f |
383 * | counter_idx | packet_count | byte_count |
385 for (i = 0; i < n_counters; i++) {
386 const void *entry = data + 8 + 16 * i;
387 u64 packet_count, byte_count;
388 u32 counter_idx;
390 counter_idx = le32_to_cpu(*(const __le32 *)entry);
391 packet_count = le32_to_cpu(*(const __le32 *)(entry + 4)) |
392 ((u64)le16_to_cpu(*(const __le16 *)(entry + 8)) << 32);
393 byte_count = le16_to_cpu(*(const __le16 *)(entry + 10)) |
394 ((u64)le32_to_cpu(*(const __le32 *)(entry + 12)) << 16);
395 efx_tc_counter_update(efx, EFX_TC_COUNTER_TYPE_AR, counter_idx,
396 packet_count, byte_count, mark);
400 #define TCV2_HDR_PTR(pkt, field) \
401 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 7), \
402 (pkt) + ERF_SC_PACKETISER_HEADER_##field##_LBN / 8)
403 #define TCV2_HDR_BYTE(pkt, field) \
404 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 8),\
405 *TCV2_HDR_PTR(pkt, field))
406 #define TCV2_HDR_WORD(pkt, field) \
407 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 16),\
408 (void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 15), \
409 *(__force const __le16 *)TCV2_HDR_PTR(pkt, field))
410 #define TCV2_PKT_PTR(pkt, poff, i, field) \
411 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_PAYLOAD_##field##_LBN & 7), \
412 (pkt) + ERF_SC_PACKETISER_PAYLOAD_##field##_LBN/8 + poff + \
413 i * ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE)
415 /* Read a little-endian 48-bit field with 16-bit alignment */
416 static u64 efx_tc_read48(const __le16 *field)
418 u64 out = 0;
419 int i;
421 for (i = 0; i < 3; i++)
422 out |= (u64)le16_to_cpu(field[i]) << (i * 16);
423 return out;
426 static enum efx_tc_counter_type efx_tc_rx_version_2(struct efx_nic *efx,
427 const u8 *data, u32 mark)
429 u8 payload_offset, header_offset, ident;
430 enum efx_tc_counter_type type;
431 u16 n_counters, i;
433 ident = TCV2_HDR_BYTE(data, IDENTIFIER);
434 switch (ident) {
435 case ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR:
436 type = EFX_TC_COUNTER_TYPE_AR;
437 break;
438 case ERF_SC_PACKETISER_HEADER_IDENTIFIER_CT:
439 type = EFX_TC_COUNTER_TYPE_CT;
440 break;
441 case ERF_SC_PACKETISER_HEADER_IDENTIFIER_OR:
442 type = EFX_TC_COUNTER_TYPE_OR;
443 break;
444 default:
445 if (net_ratelimit())
446 netif_err(efx, drv, efx->net_dev,
447 "ignored v2 MAE counter packet (bad identifier %u"
448 "), counters may be inaccurate\n", ident);
449 return EFX_TC_COUNTER_TYPE_MAX;
451 header_offset = TCV2_HDR_BYTE(data, HEADER_OFFSET);
452 /* mae_counter_format.h implies that this offset is fixed, since it
453 * carries on with SOP-based LBNs for the fields in this header
455 if (header_offset != ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT) {
456 if (net_ratelimit())
457 netif_err(efx, drv, efx->net_dev,
458 "choked on v2 MAE counter packet (bad header_offset %u"
459 "), counters may be inaccurate\n", header_offset);
460 return EFX_TC_COUNTER_TYPE_MAX;
462 payload_offset = TCV2_HDR_BYTE(data, PAYLOAD_OFFSET);
463 n_counters = le16_to_cpu(TCV2_HDR_WORD(data, COUNT));
465 for (i = 0; i < n_counters; i++) {
466 const void *counter_idx_p, *packet_count_p, *byte_count_p;
467 u64 packet_count, byte_count;
468 u32 counter_idx;
470 /* 24-bit field with 32-bit alignment */
471 counter_idx_p = TCV2_PKT_PTR(data, payload_offset, i, COUNTER_INDEX);
472 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_WIDTH != 24);
473 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_LBN & 31);
474 counter_idx = le32_to_cpu(*(const __le32 *)counter_idx_p) & 0xffffff;
475 /* 48-bit field with 16-bit alignment */
476 packet_count_p = TCV2_PKT_PTR(data, payload_offset, i, PACKET_COUNT);
477 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_WIDTH != 48);
478 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LBN & 15);
479 packet_count = efx_tc_read48((const __le16 *)packet_count_p);
480 /* 48-bit field with 16-bit alignment */
481 byte_count_p = TCV2_PKT_PTR(data, payload_offset, i, BYTE_COUNT);
482 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_WIDTH != 48);
483 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LBN & 15);
484 byte_count = efx_tc_read48((const __le16 *)byte_count_p);
486 if (type == EFX_TC_COUNTER_TYPE_CT) {
487 /* CT counters are 1-bit saturating counters to update
488 * the lastuse time in CT stats. A received CT counter
489 * should have packet counter to 0 and only LSB bit on
490 * in byte counter.
492 if (packet_count || byte_count != 1)
493 netdev_warn_once(efx->net_dev,
494 "CT counter with inconsistent state (%llu, %llu)\n",
495 packet_count, byte_count);
496 /* Do not increment the driver's byte counter */
497 byte_count = 0;
500 efx_tc_counter_update(efx, type, counter_idx, packet_count,
501 byte_count, mark);
503 return type;
506 /* We always swallow the packet, whether successful or not, since it's not
507 * a network packet and shouldn't ever be forwarded to the stack.
508 * @mark is the generation count for counter allocations.
510 static bool efx_tc_rx(struct efx_rx_queue *rx_queue, u32 mark)
512 struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
513 struct efx_rx_buffer *rx_buf = efx_rx_buffer(rx_queue,
514 channel->rx_pkt_index);
515 const u8 *data = efx_rx_buf_va(rx_buf);
516 struct efx_nic *efx = rx_queue->efx;
517 enum efx_tc_counter_type type;
518 u8 version;
520 /* version is always first byte of packet */
521 version = *data;
522 switch (version) {
523 case 1:
524 type = EFX_TC_COUNTER_TYPE_AR;
525 efx_tc_rx_version_1(efx, data, mark);
526 break;
527 case ERF_SC_PACKETISER_HEADER_VERSION_VALUE: // 2
528 type = efx_tc_rx_version_2(efx, data, mark);
529 break;
530 default:
531 if (net_ratelimit())
532 netif_err(efx, drv, efx->net_dev,
533 "choked on MAE counter packet (bad version %u"
534 "); counters may be inaccurate\n",
535 version);
536 goto out;
539 if (type < EFX_TC_COUNTER_TYPE_MAX) {
540 /* Update seen_gen unconditionally, to avoid a missed wakeup if
541 * we race with efx_mae_stop_counters().
543 efx->tc->seen_gen[type] = mark;
544 if (efx->tc->flush_counters &&
545 (s32)(efx->tc->flush_gen[type] - mark) <= 0)
546 wake_up(&efx->tc->flush_wq);
548 out:
549 efx_free_rx_buffers(rx_queue, rx_buf, 1);
550 channel->rx_pkt_n_frags = 0;
551 return true;
554 const struct efx_channel_type efx_tc_channel_type = {
555 .handle_no_channel = efx_tc_handle_no_channel,
556 .pre_probe = efx_tc_probe_channel,
557 .start = efx_tc_start_channel,
558 .stop = efx_tc_stop_channel,
559 .post_remove = efx_tc_remove_channel,
560 .get_name = efx_tc_get_channel_name,
561 .receive_raw = efx_tc_rx,
562 .keep_eventq = true,