1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2022 Advanced Micro Devices, Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
11 #include "tc_counters.h"
12 #include "tc_encap_actions.h"
13 #include "mae_counter_format.h"
15 #include "rx_common.h"
17 /* Counter-management hashtables */
19 static const struct rhashtable_params efx_tc_counter_id_ht_params
= {
20 .key_len
= offsetof(struct efx_tc_counter_index
, linkage
),
22 .head_offset
= offsetof(struct efx_tc_counter_index
, linkage
),
25 static const struct rhashtable_params efx_tc_counter_ht_params
= {
26 .key_len
= offsetof(struct efx_tc_counter
, linkage
),
28 .head_offset
= offsetof(struct efx_tc_counter
, linkage
),
31 static void efx_tc_counter_free(void *ptr
, void *__unused
)
33 struct efx_tc_counter
*cnt
= ptr
;
35 WARN_ON(!list_empty(&cnt
->users
));
36 /* We'd like to synchronize_rcu() here, but unfortunately we aren't
37 * removing the element from the hashtable (it's not clear that's a
38 * safe thing to do in an rhashtable_free_and_destroy free_fn), so
39 * threads could still be obtaining new pointers to *cnt if they can
40 * race against this function at all.
42 flush_work(&cnt
->work
);
43 EFX_WARN_ON_PARANOID(spin_is_locked(&cnt
->lock
));
47 static void efx_tc_counter_id_free(void *ptr
, void *__unused
)
49 struct efx_tc_counter_index
*ctr
= ptr
;
51 WARN_ON(refcount_read(&ctr
->ref
));
55 int efx_tc_init_counters(struct efx_nic
*efx
)
59 rc
= rhashtable_init(&efx
->tc
->counter_id_ht
, &efx_tc_counter_id_ht_params
);
61 goto fail_counter_id_ht
;
62 rc
= rhashtable_init(&efx
->tc
->counter_ht
, &efx_tc_counter_ht_params
);
67 rhashtable_destroy(&efx
->tc
->counter_id_ht
);
72 /* Only call this in init failure teardown.
73 * Normal exit should fini instead as there may be entries in the table.
75 void efx_tc_destroy_counters(struct efx_nic
*efx
)
77 rhashtable_destroy(&efx
->tc
->counter_ht
);
78 rhashtable_destroy(&efx
->tc
->counter_id_ht
);
81 void efx_tc_fini_counters(struct efx_nic
*efx
)
83 rhashtable_free_and_destroy(&efx
->tc
->counter_id_ht
, efx_tc_counter_id_free
, NULL
);
84 rhashtable_free_and_destroy(&efx
->tc
->counter_ht
, efx_tc_counter_free
, NULL
);
87 static void efx_tc_counter_work(struct work_struct
*work
)
89 struct efx_tc_counter
*cnt
= container_of(work
, struct efx_tc_counter
, work
);
90 struct efx_tc_encap_action
*encap
;
91 struct efx_tc_action_set
*act
;
92 unsigned long touched
;
95 spin_lock_bh(&cnt
->lock
);
96 touched
= READ_ONCE(cnt
->touched
);
98 list_for_each_entry(act
, &cnt
->users
, count_user
) {
99 encap
= act
->encap_md
;
102 if (!encap
->neigh
) /* can't happen */
104 if (time_after_eq(encap
->neigh
->used
, touched
))
106 encap
->neigh
->used
= touched
;
107 /* We have passed traffic using this ARP entry, so
108 * indicate to the ARP cache that it's still active
110 if (encap
->neigh
->dst_ip
)
111 n
= neigh_lookup(&arp_tbl
, &encap
->neigh
->dst_ip
,
112 encap
->neigh
->egdev
);
114 #if IS_ENABLED(CONFIG_IPV6)
115 n
= neigh_lookup(ipv6_stub
->nd_tbl
,
116 &encap
->neigh
->dst_ip6
,
117 encap
->neigh
->egdev
);
124 neigh_event_send(n
, NULL
);
127 spin_unlock_bh(&cnt
->lock
);
130 /* Counter allocation */
132 struct efx_tc_counter
*efx_tc_flower_allocate_counter(struct efx_nic
*efx
,
135 struct efx_tc_counter
*cnt
;
138 cnt
= kzalloc(sizeof(*cnt
), GFP_USER
);
140 return ERR_PTR(-ENOMEM
);
142 spin_lock_init(&cnt
->lock
);
143 INIT_WORK(&cnt
->work
, efx_tc_counter_work
);
144 cnt
->touched
= jiffies
;
147 rc
= efx_mae_allocate_counter(efx
, cnt
);
150 INIT_LIST_HEAD(&cnt
->users
);
151 rc
= rhashtable_insert_fast(&efx
->tc
->counter_ht
, &cnt
->linkage
,
152 efx_tc_counter_ht_params
);
157 /* If we get here, it implies that we couldn't insert into the table,
158 * which in turn probably means that the fw_id was already taken.
159 * In that case, it's unclear whether we really 'own' the fw_id; but
160 * the firmware seemed to think we did, so it's proper to free it.
162 rc2
= efx_mae_free_counter(efx
, cnt
);
164 netif_warn(efx
, hw
, efx
->net_dev
,
165 "Failed to free MAE counter %u, rc %d\n",
169 return ERR_PTR(rc
> 0 ? -EIO
: rc
);
172 void efx_tc_flower_release_counter(struct efx_nic
*efx
,
173 struct efx_tc_counter
*cnt
)
177 rhashtable_remove_fast(&efx
->tc
->counter_ht
, &cnt
->linkage
,
178 efx_tc_counter_ht_params
);
179 rc
= efx_mae_free_counter(efx
, cnt
);
181 netif_warn(efx
, hw
, efx
->net_dev
,
182 "Failed to free MAE counter %u, rc %d\n",
184 WARN_ON(!list_empty(&cnt
->users
));
185 /* This doesn't protect counter updates coming in arbitrarily long
186 * after we deleted the counter. The RCU just ensures that we won't
187 * free the counter while another thread has a pointer to it.
188 * Ensuring we don't update the wrong counter if the ID gets re-used
189 * is handled by the generation count.
192 flush_work(&cnt
->work
);
193 EFX_WARN_ON_PARANOID(spin_is_locked(&cnt
->lock
));
197 static struct efx_tc_counter
*efx_tc_flower_find_counter_by_fw_id(
198 struct efx_nic
*efx
, int type
, u32 fw_id
)
200 struct efx_tc_counter key
= {};
205 return rhashtable_lookup_fast(&efx
->tc
->counter_ht
, &key
,
206 efx_tc_counter_ht_params
);
209 /* TC cookie to counter mapping */
211 void efx_tc_flower_put_counter_index(struct efx_nic
*efx
,
212 struct efx_tc_counter_index
*ctr
)
214 if (!refcount_dec_and_test(&ctr
->ref
))
215 return; /* still in use */
216 rhashtable_remove_fast(&efx
->tc
->counter_id_ht
, &ctr
->linkage
,
217 efx_tc_counter_id_ht_params
);
218 efx_tc_flower_release_counter(efx
, ctr
->cnt
);
222 struct efx_tc_counter_index
*efx_tc_flower_get_counter_index(
223 struct efx_nic
*efx
, unsigned long cookie
,
224 enum efx_tc_counter_type type
)
226 struct efx_tc_counter_index
*ctr
, *old
;
227 struct efx_tc_counter
*cnt
;
229 ctr
= kzalloc(sizeof(*ctr
), GFP_USER
);
231 return ERR_PTR(-ENOMEM
);
232 ctr
->cookie
= cookie
;
233 old
= rhashtable_lookup_get_insert_fast(&efx
->tc
->counter_id_ht
,
235 efx_tc_counter_id_ht_params
);
237 /* don't need our new entry */
239 if (IS_ERR(old
)) /* oh dear, it's actually an error */
240 return ERR_CAST(old
);
241 if (!refcount_inc_not_zero(&old
->ref
))
242 return ERR_PTR(-EAGAIN
);
243 /* existing entry found */
246 cnt
= efx_tc_flower_allocate_counter(efx
, type
);
248 rhashtable_remove_fast(&efx
->tc
->counter_id_ht
,
250 efx_tc_counter_id_ht_params
);
252 return ERR_CAST(cnt
);
255 refcount_set(&ctr
->ref
, 1);
260 struct efx_tc_counter_index
*efx_tc_flower_find_counter_index(
261 struct efx_nic
*efx
, unsigned long cookie
)
263 struct efx_tc_counter_index key
= {};
266 return rhashtable_lookup_fast(&efx
->tc
->counter_id_ht
, &key
,
267 efx_tc_counter_id_ht_params
);
270 /* TC Channel. Counter updates are delivered on this channel's RXQ. */
272 static void efx_tc_handle_no_channel(struct efx_nic
*efx
)
274 netif_warn(efx
, drv
, efx
->net_dev
,
275 "MAE counters require MSI-X and 1 additional interrupt vector.\n");
278 static int efx_tc_probe_channel(struct efx_channel
*channel
)
280 struct efx_rx_queue
*rx_queue
= &channel
->rx_queue
;
282 channel
->irq_moderation_us
= 0;
283 rx_queue
->core_index
= 0;
285 INIT_WORK(&rx_queue
->grant_work
, efx_mae_counters_grant_credits
);
290 static int efx_tc_start_channel(struct efx_channel
*channel
)
292 struct efx_rx_queue
*rx_queue
= efx_channel_get_rx_queue(channel
);
293 struct efx_nic
*efx
= channel
->efx
;
295 return efx_mae_start_counters(efx
, rx_queue
);
298 static void efx_tc_stop_channel(struct efx_channel
*channel
)
300 struct efx_rx_queue
*rx_queue
= efx_channel_get_rx_queue(channel
);
301 struct efx_nic
*efx
= channel
->efx
;
304 rc
= efx_mae_stop_counters(efx
, rx_queue
);
306 netif_warn(efx
, drv
, efx
->net_dev
,
307 "Failed to stop MAE counters streaming, rc=%d.\n",
309 rx_queue
->grant_credits
= false;
310 flush_work(&rx_queue
->grant_work
);
313 static void efx_tc_remove_channel(struct efx_channel
*channel
)
317 static void efx_tc_get_channel_name(struct efx_channel
*channel
,
318 char *buf
, size_t len
)
320 snprintf(buf
, len
, "%s-mae", channel
->efx
->name
);
323 static void efx_tc_counter_update(struct efx_nic
*efx
,
324 enum efx_tc_counter_type counter_type
,
325 u32 counter_idx
, u64 packets
, u64 bytes
,
328 struct efx_tc_counter
*cnt
;
330 rcu_read_lock(); /* Protect against deletion of 'cnt' */
331 cnt
= efx_tc_flower_find_counter_by_fw_id(efx
, counter_type
, counter_idx
);
333 /* This can legitimately happen when a counter is removed,
334 * with updates for the counter still in-flight; however this
335 * should be an infrequent occurrence.
338 netif_dbg(efx
, drv
, efx
->net_dev
,
339 "Got update for unwanted MAE counter %u type %u\n",
340 counter_idx
, counter_type
);
344 spin_lock_bh(&cnt
->lock
);
345 if ((s32
)mark
- (s32
)cnt
->gen
< 0) {
346 /* This counter update packet is from before the counter was
347 * allocated; thus it must be for a previous counter with
348 * the same ID that has since been freed, and it should be
352 /* Update latest seen generation count. This ensures that
353 * even a long-lived counter won't start getting ignored if
354 * the generation count wraps around, unless it somehow
355 * manages to go 1<<31 generations without an update.
358 /* update counter values */
359 cnt
->packets
+= packets
;
361 cnt
->touched
= jiffies
;
363 spin_unlock_bh(&cnt
->lock
);
364 schedule_work(&cnt
->work
);
369 static void efx_tc_rx_version_1(struct efx_nic
*efx
, const u8
*data
, u32 mark
)
374 * + | 0 | 1 | 2 | 3 |
375 * 0 |version | reserved |
376 * 4 | seq_index | n_counters |
379 n_counters
= le16_to_cpu(*(const __le16
*)(data
+ 6));
381 /* Counter update entry format:
382 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f |
383 * | counter_idx | packet_count | byte_count |
385 for (i
= 0; i
< n_counters
; i
++) {
386 const void *entry
= data
+ 8 + 16 * i
;
387 u64 packet_count
, byte_count
;
390 counter_idx
= le32_to_cpu(*(const __le32
*)entry
);
391 packet_count
= le32_to_cpu(*(const __le32
*)(entry
+ 4)) |
392 ((u64
)le16_to_cpu(*(const __le16
*)(entry
+ 8)) << 32);
393 byte_count
= le16_to_cpu(*(const __le16
*)(entry
+ 10)) |
394 ((u64
)le32_to_cpu(*(const __le32
*)(entry
+ 12)) << 16);
395 efx_tc_counter_update(efx
, EFX_TC_COUNTER_TYPE_AR
, counter_idx
,
396 packet_count
, byte_count
, mark
);
400 #define TCV2_HDR_PTR(pkt, field) \
401 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 7), \
402 (pkt) + ERF_SC_PACKETISER_HEADER_##field##_LBN / 8)
403 #define TCV2_HDR_BYTE(pkt, field) \
404 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 8),\
405 *TCV2_HDR_PTR(pkt, field))
406 #define TCV2_HDR_WORD(pkt, field) \
407 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 16),\
408 (void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 15), \
409 *(__force const __le16 *)TCV2_HDR_PTR(pkt, field))
410 #define TCV2_PKT_PTR(pkt, poff, i, field) \
411 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_PAYLOAD_##field##_LBN & 7), \
412 (pkt) + ERF_SC_PACKETISER_PAYLOAD_##field##_LBN/8 + poff + \
413 i * ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE)
415 /* Read a little-endian 48-bit field with 16-bit alignment */
416 static u64
efx_tc_read48(const __le16
*field
)
421 for (i
= 0; i
< 3; i
++)
422 out
|= (u64
)le16_to_cpu(field
[i
]) << (i
* 16);
426 static enum efx_tc_counter_type
efx_tc_rx_version_2(struct efx_nic
*efx
,
427 const u8
*data
, u32 mark
)
429 u8 payload_offset
, header_offset
, ident
;
430 enum efx_tc_counter_type type
;
433 ident
= TCV2_HDR_BYTE(data
, IDENTIFIER
);
435 case ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR
:
436 type
= EFX_TC_COUNTER_TYPE_AR
;
438 case ERF_SC_PACKETISER_HEADER_IDENTIFIER_CT
:
439 type
= EFX_TC_COUNTER_TYPE_CT
;
441 case ERF_SC_PACKETISER_HEADER_IDENTIFIER_OR
:
442 type
= EFX_TC_COUNTER_TYPE_OR
;
446 netif_err(efx
, drv
, efx
->net_dev
,
447 "ignored v2 MAE counter packet (bad identifier %u"
448 "), counters may be inaccurate\n", ident
);
449 return EFX_TC_COUNTER_TYPE_MAX
;
451 header_offset
= TCV2_HDR_BYTE(data
, HEADER_OFFSET
);
452 /* mae_counter_format.h implies that this offset is fixed, since it
453 * carries on with SOP-based LBNs for the fields in this header
455 if (header_offset
!= ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT
) {
457 netif_err(efx
, drv
, efx
->net_dev
,
458 "choked on v2 MAE counter packet (bad header_offset %u"
459 "), counters may be inaccurate\n", header_offset
);
460 return EFX_TC_COUNTER_TYPE_MAX
;
462 payload_offset
= TCV2_HDR_BYTE(data
, PAYLOAD_OFFSET
);
463 n_counters
= le16_to_cpu(TCV2_HDR_WORD(data
, COUNT
));
465 for (i
= 0; i
< n_counters
; i
++) {
466 const void *counter_idx_p
, *packet_count_p
, *byte_count_p
;
467 u64 packet_count
, byte_count
;
470 /* 24-bit field with 32-bit alignment */
471 counter_idx_p
= TCV2_PKT_PTR(data
, payload_offset
, i
, COUNTER_INDEX
);
472 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_WIDTH
!= 24);
473 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_LBN
& 31);
474 counter_idx
= le32_to_cpu(*(const __le32
*)counter_idx_p
) & 0xffffff;
475 /* 48-bit field with 16-bit alignment */
476 packet_count_p
= TCV2_PKT_PTR(data
, payload_offset
, i
, PACKET_COUNT
);
477 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_WIDTH
!= 48);
478 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LBN
& 15);
479 packet_count
= efx_tc_read48((const __le16
*)packet_count_p
);
480 /* 48-bit field with 16-bit alignment */
481 byte_count_p
= TCV2_PKT_PTR(data
, payload_offset
, i
, BYTE_COUNT
);
482 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_WIDTH
!= 48);
483 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LBN
& 15);
484 byte_count
= efx_tc_read48((const __le16
*)byte_count_p
);
486 if (type
== EFX_TC_COUNTER_TYPE_CT
) {
487 /* CT counters are 1-bit saturating counters to update
488 * the lastuse time in CT stats. A received CT counter
489 * should have packet counter to 0 and only LSB bit on
492 if (packet_count
|| byte_count
!= 1)
493 netdev_warn_once(efx
->net_dev
,
494 "CT counter with inconsistent state (%llu, %llu)\n",
495 packet_count
, byte_count
);
496 /* Do not increment the driver's byte counter */
500 efx_tc_counter_update(efx
, type
, counter_idx
, packet_count
,
506 /* We always swallow the packet, whether successful or not, since it's not
507 * a network packet and shouldn't ever be forwarded to the stack.
508 * @mark is the generation count for counter allocations.
510 static bool efx_tc_rx(struct efx_rx_queue
*rx_queue
, u32 mark
)
512 struct efx_channel
*channel
= efx_rx_queue_channel(rx_queue
);
513 struct efx_rx_buffer
*rx_buf
= efx_rx_buffer(rx_queue
,
514 channel
->rx_pkt_index
);
515 const u8
*data
= efx_rx_buf_va(rx_buf
);
516 struct efx_nic
*efx
= rx_queue
->efx
;
517 enum efx_tc_counter_type type
;
520 /* version is always first byte of packet */
524 type
= EFX_TC_COUNTER_TYPE_AR
;
525 efx_tc_rx_version_1(efx
, data
, mark
);
527 case ERF_SC_PACKETISER_HEADER_VERSION_VALUE
: // 2
528 type
= efx_tc_rx_version_2(efx
, data
, mark
);
532 netif_err(efx
, drv
, efx
->net_dev
,
533 "choked on MAE counter packet (bad version %u"
534 "); counters may be inaccurate\n",
539 if (type
< EFX_TC_COUNTER_TYPE_MAX
) {
540 /* Update seen_gen unconditionally, to avoid a missed wakeup if
541 * we race with efx_mae_stop_counters().
543 efx
->tc
->seen_gen
[type
] = mark
;
544 if (efx
->tc
->flush_counters
&&
545 (s32
)(efx
->tc
->flush_gen
[type
] - mark
) <= 0)
546 wake_up(&efx
->tc
->flush_wq
);
549 efx_free_rx_buffers(rx_queue
, rx_buf
, 1);
550 channel
->rx_pkt_n_frags
= 0;
554 const struct efx_channel_type efx_tc_channel_type
= {
555 .handle_no_channel
= efx_tc_handle_no_channel
,
556 .pre_probe
= efx_tc_probe_channel
,
557 .start
= efx_tc_start_channel
,
558 .stop
= efx_tc_stop_channel
,
559 .post_remove
= efx_tc_remove_channel
,
560 .get_name
= efx_tc_get_channel_name
,
561 .receive_raw
= efx_tc_rx
,