1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2023, Advanced Micro Devices, Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
11 #include "tc_conntrack.h"
15 static int efx_tc_flow_block(enum tc_setup_type type
, void *type_data
,
18 static const struct rhashtable_params efx_tc_ct_zone_ht_params
= {
19 .key_len
= offsetof(struct efx_tc_ct_zone
, linkage
),
21 .head_offset
= offsetof(struct efx_tc_ct_zone
, linkage
),
24 static const struct rhashtable_params efx_tc_ct_ht_params
= {
25 .key_len
= offsetof(struct efx_tc_ct_entry
, linkage
),
27 .head_offset
= offsetof(struct efx_tc_ct_entry
, linkage
),
30 static void efx_tc_ct_zone_free(void *ptr
, void *arg
)
32 struct efx_tc_ct_zone
*zone
= ptr
;
33 struct efx_nic
*efx
= zone
->efx
;
35 netif_err(efx
, drv
, efx
->net_dev
,
36 "tc ct_zone %u still present at teardown, removing\n",
39 nf_flow_table_offload_del_cb(zone
->nf_ft
, efx_tc_flow_block
, zone
);
43 static void efx_tc_ct_free(void *ptr
, void *arg
)
45 struct efx_tc_ct_entry
*conn
= ptr
;
46 struct efx_nic
*efx
= arg
;
48 netif_err(efx
, drv
, efx
->net_dev
,
49 "tc ct_entry %lx still present at teardown\n",
52 /* We can release the counter, but we can't remove the CT itself
53 * from hardware because the table meta is already gone.
55 efx_tc_flower_release_counter(efx
, conn
->cnt
);
59 int efx_tc_init_conntrack(struct efx_nic
*efx
)
63 rc
= rhashtable_init(&efx
->tc
->ct_zone_ht
, &efx_tc_ct_zone_ht_params
);
66 rc
= rhashtable_init(&efx
->tc
->ct_ht
, &efx_tc_ct_ht_params
);
71 rhashtable_destroy(&efx
->tc
->ct_zone_ht
);
76 /* Only call this in init failure teardown.
77 * Normal exit should fini instead as there may be entries in the table.
79 void efx_tc_destroy_conntrack(struct efx_nic
*efx
)
81 rhashtable_destroy(&efx
->tc
->ct_ht
);
82 rhashtable_destroy(&efx
->tc
->ct_zone_ht
);
85 void efx_tc_fini_conntrack(struct efx_nic
*efx
)
87 rhashtable_free_and_destroy(&efx
->tc
->ct_zone_ht
, efx_tc_ct_zone_free
, NULL
);
88 rhashtable_free_and_destroy(&efx
->tc
->ct_ht
, efx_tc_ct_free
, efx
);
91 #define EFX_NF_TCP_FLAG(flg) cpu_to_be16(be32_to_cpu(TCP_FLAG_##flg) >> 16)
93 static int efx_tc_ct_parse_match(struct efx_nic
*efx
, struct flow_rule
*fr
,
94 struct efx_tc_ct_entry
*conn
)
96 struct flow_dissector
*dissector
= fr
->match
.dissector
;
97 unsigned char ipv
= 0;
100 if (flow_rule_match_key(fr
, FLOW_DISSECTOR_KEY_CONTROL
)) {
101 struct flow_match_control fm
;
103 flow_rule_match_control(fr
, &fm
);
104 if (IS_ALL_ONES(fm
.mask
->addr_type
))
105 switch (fm
.key
->addr_type
) {
106 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
109 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
118 netif_dbg(efx
, drv
, efx
->net_dev
,
119 "Conntrack missing ipv specification\n");
123 if (dissector
->used_keys
&
124 ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL
) |
125 BIT_ULL(FLOW_DISSECTOR_KEY_BASIC
) |
126 BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS
) |
127 BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS
) |
128 BIT_ULL(FLOW_DISSECTOR_KEY_PORTS
) |
129 BIT_ULL(FLOW_DISSECTOR_KEY_TCP
) |
130 BIT_ULL(FLOW_DISSECTOR_KEY_META
))) {
131 netif_dbg(efx
, drv
, efx
->net_dev
,
132 "Unsupported conntrack keys %#llx\n",
133 dissector
->used_keys
);
137 if (flow_rule_match_key(fr
, FLOW_DISSECTOR_KEY_BASIC
)) {
138 struct flow_match_basic fm
;
140 flow_rule_match_basic(fr
, &fm
);
141 if (!IS_ALL_ONES(fm
.mask
->n_proto
)) {
142 netif_dbg(efx
, drv
, efx
->net_dev
,
143 "Conntrack eth_proto is not exact-match; mask %04x\n",
144 ntohs(fm
.mask
->n_proto
));
147 conn
->eth_proto
= fm
.key
->n_proto
;
148 if (conn
->eth_proto
!= (ipv
== 4 ? htons(ETH_P_IP
)
149 : htons(ETH_P_IPV6
))) {
150 netif_dbg(efx
, drv
, efx
->net_dev
,
151 "Conntrack eth_proto is not IPv%u, is %04x\n",
152 ipv
, ntohs(conn
->eth_proto
));
155 if (!IS_ALL_ONES(fm
.mask
->ip_proto
)) {
156 netif_dbg(efx
, drv
, efx
->net_dev
,
157 "Conntrack ip_proto is not exact-match; mask %02x\n",
161 conn
->ip_proto
= fm
.key
->ip_proto
;
162 switch (conn
->ip_proto
) {
169 netif_dbg(efx
, drv
, efx
->net_dev
,
170 "Conntrack ip_proto not TCP or UDP, is %02x\n",
175 netif_dbg(efx
, drv
, efx
->net_dev
,
176 "Conntrack missing eth_proto, ip_proto\n");
180 if (ipv
== 4 && flow_rule_match_key(fr
, FLOW_DISSECTOR_KEY_IPV4_ADDRS
)) {
181 struct flow_match_ipv4_addrs fm
;
183 flow_rule_match_ipv4_addrs(fr
, &fm
);
184 if (!IS_ALL_ONES(fm
.mask
->src
)) {
185 netif_dbg(efx
, drv
, efx
->net_dev
,
186 "Conntrack ipv4.src is not exact-match; mask %08x\n",
187 ntohl(fm
.mask
->src
));
190 conn
->src_ip
= fm
.key
->src
;
191 if (!IS_ALL_ONES(fm
.mask
->dst
)) {
192 netif_dbg(efx
, drv
, efx
->net_dev
,
193 "Conntrack ipv4.dst is not exact-match; mask %08x\n",
194 ntohl(fm
.mask
->dst
));
197 conn
->dst_ip
= fm
.key
->dst
;
198 } else if (ipv
== 6 && flow_rule_match_key(fr
, FLOW_DISSECTOR_KEY_IPV6_ADDRS
)) {
199 struct flow_match_ipv6_addrs fm
;
201 flow_rule_match_ipv6_addrs(fr
, &fm
);
202 if (!efx_ipv6_addr_all_ones(&fm
.mask
->src
)) {
203 netif_dbg(efx
, drv
, efx
->net_dev
,
204 "Conntrack ipv6.src is not exact-match; mask %pI6\n",
208 conn
->src_ip6
= fm
.key
->src
;
209 if (!efx_ipv6_addr_all_ones(&fm
.mask
->dst
)) {
210 netif_dbg(efx
, drv
, efx
->net_dev
,
211 "Conntrack ipv6.dst is not exact-match; mask %pI6\n",
215 conn
->dst_ip6
= fm
.key
->dst
;
217 netif_dbg(efx
, drv
, efx
->net_dev
,
218 "Conntrack missing IPv%u addrs\n", ipv
);
222 if (flow_rule_match_key(fr
, FLOW_DISSECTOR_KEY_PORTS
)) {
223 struct flow_match_ports fm
;
225 flow_rule_match_ports(fr
, &fm
);
226 if (!IS_ALL_ONES(fm
.mask
->src
)) {
227 netif_dbg(efx
, drv
, efx
->net_dev
,
228 "Conntrack ports.src is not exact-match; mask %04x\n",
229 ntohs(fm
.mask
->src
));
232 conn
->l4_sport
= fm
.key
->src
;
233 if (!IS_ALL_ONES(fm
.mask
->dst
)) {
234 netif_dbg(efx
, drv
, efx
->net_dev
,
235 "Conntrack ports.dst is not exact-match; mask %04x\n",
236 ntohs(fm
.mask
->dst
));
239 conn
->l4_dport
= fm
.key
->dst
;
241 netif_dbg(efx
, drv
, efx
->net_dev
, "Conntrack missing L4 ports\n");
245 if (flow_rule_match_key(fr
, FLOW_DISSECTOR_KEY_TCP
)) {
246 __be16 tcp_interesting_flags
;
247 struct flow_match_tcp fm
;
250 netif_dbg(efx
, drv
, efx
->net_dev
,
251 "Conntrack matching on TCP keys but ipproto is not tcp\n");
254 flow_rule_match_tcp(fr
, &fm
);
255 tcp_interesting_flags
= EFX_NF_TCP_FLAG(SYN
) |
256 EFX_NF_TCP_FLAG(RST
) |
257 EFX_NF_TCP_FLAG(FIN
);
258 /* If any of the tcp_interesting_flags is set, we always
259 * inhibit CT lookup in LHS (so SW can update CT table).
261 if (fm
.key
->flags
& tcp_interesting_flags
) {
262 netif_dbg(efx
, drv
, efx
->net_dev
,
263 "Unsupported conntrack tcp.flags %04x/%04x\n",
264 ntohs(fm
.key
->flags
), ntohs(fm
.mask
->flags
));
267 /* Other TCP flags cannot be filtered at CT */
268 if (fm
.mask
->flags
& ~tcp_interesting_flags
) {
269 netif_dbg(efx
, drv
, efx
->net_dev
,
270 "Unsupported conntrack tcp.flags %04x/%04x\n",
271 ntohs(fm
.key
->flags
), ntohs(fm
.mask
->flags
));
280 * struct efx_tc_ct_mangler_state - tracks which fields have been pedited
282 * @ipv4: IP source or destination addr has been set
283 * @tcpudp: TCP/UDP source or destination port has been set
285 struct efx_tc_ct_mangler_state
{
290 static int efx_tc_ct_mangle(struct efx_nic
*efx
, struct efx_tc_ct_entry
*conn
,
291 const struct flow_action_entry
*fa
,
292 struct efx_tc_ct_mangler_state
*mung
)
294 /* Is this the first mangle we've processed for this rule? */
295 bool first
= !(mung
->ipv4
|| mung
->tcpudp
);
298 switch (fa
->mangle
.htype
) {
299 case FLOW_ACT_MANGLE_HDR_TYPE_IP4
:
300 switch (fa
->mangle
.offset
) {
301 case offsetof(struct iphdr
, daddr
):
304 case offsetof(struct iphdr
, saddr
):
307 conn
->nat_ip
= htonl(fa
->mangle
.val
);
314 case FLOW_ACT_MANGLE_HDR_TYPE_TCP
:
315 case FLOW_ACT_MANGLE_HDR_TYPE_UDP
:
316 /* Both struct tcphdr and struct udphdr start with
319 * so we can use the same code for both.
321 switch (fa
->mangle
.offset
) {
322 case offsetof(struct tcphdr
, dest
):
323 BUILD_BUG_ON(offsetof(struct tcphdr
, dest
) !=
324 offsetof(struct udphdr
, dest
));
327 case offsetof(struct tcphdr
, source
):
328 BUILD_BUG_ON(offsetof(struct tcphdr
, source
) !=
329 offsetof(struct udphdr
, source
));
330 if (~fa
->mangle
.mask
!= 0xffff)
332 conn
->l4_natport
= htons(fa
->mangle
.val
);
342 /* first mangle tells us whether this is SNAT or DNAT;
343 * subsequent mangles must match that
347 else if (conn
->dnat
!= dnat
)
352 static int efx_tc_ct_replace(struct efx_tc_ct_zone
*ct_zone
,
353 struct flow_cls_offload
*tc
)
355 struct flow_rule
*fr
= flow_cls_offload_flow_rule(tc
);
356 struct efx_tc_ct_mangler_state mung
= {};
357 struct efx_tc_ct_entry
*conn
, *old
;
358 struct efx_nic
*efx
= ct_zone
->efx
;
359 const struct flow_action_entry
*fa
;
360 struct efx_tc_counter
*cnt
;
363 if (WARN_ON(!efx
->tc
))
365 if (WARN_ON(!efx
->tc
->up
))
368 conn
= kzalloc(sizeof(*conn
), GFP_USER
);
371 conn
->cookie
= tc
->cookie
;
372 old
= rhashtable_lookup_get_insert_fast(&efx
->tc
->ct_ht
,
374 efx_tc_ct_ht_params
);
379 netif_dbg(efx
, drv
, efx
->net_dev
,
380 "Already offloaded conntrack (cookie %lx)\n", tc
->cookie
);
386 conn
->zone
= ct_zone
;
387 rc
= efx_tc_ct_parse_match(efx
, fr
, conn
);
392 flow_action_for_each(i
, fa
, &fr
->action
) {
394 case FLOW_ACTION_CT_METADATA
:
395 conn
->mark
= fa
->ct_metadata
.mark
;
396 if (memchr_inv(fa
->ct_metadata
.labels
, 0, sizeof(fa
->ct_metadata
.labels
))) {
397 netif_dbg(efx
, drv
, efx
->net_dev
,
398 "Setting CT label not supported\n");
403 case FLOW_ACTION_MANGLE
:
404 if (conn
->eth_proto
!= htons(ETH_P_IP
)) {
405 netif_dbg(efx
, drv
, efx
->net_dev
,
406 "NAT only supported for IPv4\n");
410 rc
= efx_tc_ct_mangle(efx
, conn
, fa
, &mung
);
415 netif_dbg(efx
, drv
, efx
->net_dev
,
416 "Unhandled action %u for conntrack\n", fa
->id
);
422 /* fill in defaults for unmangled values */
424 conn
->nat_ip
= conn
->dnat
? conn
->dst_ip
: conn
->src_ip
;
426 conn
->l4_natport
= conn
->dnat
? conn
->l4_dport
: conn
->l4_sport
;
428 cnt
= efx_tc_flower_allocate_counter(efx
, EFX_TC_COUNTER_TYPE_CT
);
435 rc
= efx_mae_insert_ct(efx
, conn
);
437 netif_dbg(efx
, drv
, efx
->net_dev
,
438 "Failed to insert conntrack, %d\n", rc
);
441 mutex_lock(&ct_zone
->mutex
);
442 list_add_tail(&conn
->list
, &ct_zone
->cts
);
443 mutex_unlock(&ct_zone
->mutex
);
447 efx_tc_flower_release_counter(efx
, conn
->cnt
);
449 rhashtable_remove_fast(&efx
->tc
->ct_ht
, &conn
->linkage
,
450 efx_tc_ct_ht_params
);
455 /* Caller must follow with efx_tc_ct_remove_finish() after RCU grace period! */
456 static void efx_tc_ct_remove(struct efx_nic
*efx
, struct efx_tc_ct_entry
*conn
)
460 /* Remove it from HW */
461 rc
= efx_mae_remove_ct(efx
, conn
);
462 /* Delete it from SW */
463 rhashtable_remove_fast(&efx
->tc
->ct_ht
, &conn
->linkage
,
464 efx_tc_ct_ht_params
);
466 netif_err(efx
, drv
, efx
->net_dev
,
467 "Failed to remove conntrack %lx from hw, rc %d\n",
470 netif_dbg(efx
, drv
, efx
->net_dev
, "Removed conntrack %lx\n",
475 static void efx_tc_ct_remove_finish(struct efx_nic
*efx
, struct efx_tc_ct_entry
*conn
)
477 /* Remove related CT counter. This is delayed after the conn object we
478 * are working with has been successfully removed. This protects the
479 * counter from being used-after-free inside efx_tc_ct_stats.
481 efx_tc_flower_release_counter(efx
, conn
->cnt
);
485 static int efx_tc_ct_destroy(struct efx_tc_ct_zone
*ct_zone
,
486 struct flow_cls_offload
*tc
)
488 struct efx_nic
*efx
= ct_zone
->efx
;
489 struct efx_tc_ct_entry
*conn
;
491 conn
= rhashtable_lookup_fast(&efx
->tc
->ct_ht
, &tc
->cookie
,
492 efx_tc_ct_ht_params
);
494 netif_warn(efx
, drv
, efx
->net_dev
,
495 "Conntrack %lx not found to remove\n", tc
->cookie
);
499 mutex_lock(&ct_zone
->mutex
);
500 list_del(&conn
->list
);
501 efx_tc_ct_remove(efx
, conn
);
502 mutex_unlock(&ct_zone
->mutex
);
504 efx_tc_ct_remove_finish(efx
, conn
);
508 static int efx_tc_ct_stats(struct efx_tc_ct_zone
*ct_zone
,
509 struct flow_cls_offload
*tc
)
511 struct efx_nic
*efx
= ct_zone
->efx
;
512 struct efx_tc_ct_entry
*conn
;
513 struct efx_tc_counter
*cnt
;
516 conn
= rhashtable_lookup_fast(&efx
->tc
->ct_ht
, &tc
->cookie
,
517 efx_tc_ct_ht_params
);
519 netif_warn(efx
, drv
, efx
->net_dev
,
520 "Conntrack %lx not found for stats\n", tc
->cookie
);
526 spin_lock_bh(&cnt
->lock
);
527 /* Report only last use */
528 flow_stats_update(&tc
->stats
, 0, 0, 0, cnt
->touched
,
529 FLOW_ACTION_HW_STATS_DELAYED
);
530 spin_unlock_bh(&cnt
->lock
);
536 static int efx_tc_flow_block(enum tc_setup_type type
, void *type_data
,
539 struct flow_cls_offload
*tcb
= type_data
;
540 struct efx_tc_ct_zone
*ct_zone
= cb_priv
;
542 if (type
!= TC_SETUP_CLSFLOWER
)
545 switch (tcb
->command
) {
546 case FLOW_CLS_REPLACE
:
547 return efx_tc_ct_replace(ct_zone
, tcb
);
548 case FLOW_CLS_DESTROY
:
549 return efx_tc_ct_destroy(ct_zone
, tcb
);
551 return efx_tc_ct_stats(ct_zone
, tcb
);
559 struct efx_tc_ct_zone
*efx_tc_ct_register_zone(struct efx_nic
*efx
, u16 zone
,
560 struct nf_flowtable
*ct_ft
)
562 struct efx_tc_ct_zone
*ct_zone
, *old
;
565 ct_zone
= kzalloc(sizeof(*ct_zone
), GFP_USER
);
567 return ERR_PTR(-ENOMEM
);
568 ct_zone
->zone
= zone
;
569 old
= rhashtable_lookup_get_insert_fast(&efx
->tc
->ct_zone_ht
,
571 efx_tc_ct_zone_ht_params
);
573 /* don't need our new entry */
575 if (IS_ERR(old
)) /* oh dear, it's actually an error */
576 return ERR_CAST(old
);
577 if (!refcount_inc_not_zero(&old
->ref
))
578 return ERR_PTR(-EAGAIN
);
579 /* existing entry found */
580 WARN_ON_ONCE(old
->nf_ft
!= ct_ft
);
581 netif_dbg(efx
, drv
, efx
->net_dev
,
582 "Found existing ct_zone for %u\n", zone
);
585 ct_zone
->nf_ft
= ct_ft
;
587 INIT_LIST_HEAD(&ct_zone
->cts
);
588 mutex_init(&ct_zone
->mutex
);
589 rc
= nf_flow_table_offload_add_cb(ct_ft
, efx_tc_flow_block
, ct_zone
);
590 netif_dbg(efx
, drv
, efx
->net_dev
, "Adding new ct_zone for %u, rc %d\n",
594 refcount_set(&ct_zone
->ref
, 1);
597 rhashtable_remove_fast(&efx
->tc
->ct_zone_ht
, &ct_zone
->linkage
,
598 efx_tc_ct_zone_ht_params
);
603 void efx_tc_ct_unregister_zone(struct efx_nic
*efx
,
604 struct efx_tc_ct_zone
*ct_zone
)
606 struct efx_tc_ct_entry
*conn
, *next
;
608 if (!refcount_dec_and_test(&ct_zone
->ref
))
609 return; /* still in use */
610 nf_flow_table_offload_del_cb(ct_zone
->nf_ft
, efx_tc_flow_block
, ct_zone
);
611 rhashtable_remove_fast(&efx
->tc
->ct_zone_ht
, &ct_zone
->linkage
,
612 efx_tc_ct_zone_ht_params
);
613 mutex_lock(&ct_zone
->mutex
);
614 list_for_each_entry(conn
, &ct_zone
->cts
, list
)
615 efx_tc_ct_remove(efx
, conn
);
617 /* need to use _safe because efx_tc_ct_remove_finish() frees conn */
618 list_for_each_entry_safe(conn
, next
, &ct_zone
->cts
, list
)
619 efx_tc_ct_remove_finish(efx
, conn
);
620 mutex_unlock(&ct_zone
->mutex
);
621 mutex_destroy(&ct_zone
->mutex
);
622 netif_dbg(efx
, drv
, efx
->net_dev
, "Removed ct_zone for %u\n",