Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / drivers / net / ethernet / sfc / tc_conntrack.c
blobd90206f27161e4e3cf7d3bae93cb648b14f9584a
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2023, Advanced Micro Devices, Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
11 #include "tc_conntrack.h"
12 #include "tc.h"
13 #include "mae.h"
15 static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
16 void *cb_priv);
18 static const struct rhashtable_params efx_tc_ct_zone_ht_params = {
19 .key_len = offsetof(struct efx_tc_ct_zone, linkage),
20 .key_offset = 0,
21 .head_offset = offsetof(struct efx_tc_ct_zone, linkage),
24 static const struct rhashtable_params efx_tc_ct_ht_params = {
25 .key_len = offsetof(struct efx_tc_ct_entry, linkage),
26 .key_offset = 0,
27 .head_offset = offsetof(struct efx_tc_ct_entry, linkage),
30 static void efx_tc_ct_zone_free(void *ptr, void *arg)
32 struct efx_tc_ct_zone *zone = ptr;
33 struct efx_nic *efx = zone->efx;
35 netif_err(efx, drv, efx->net_dev,
36 "tc ct_zone %u still present at teardown, removing\n",
37 zone->zone);
39 nf_flow_table_offload_del_cb(zone->nf_ft, efx_tc_flow_block, zone);
40 kfree(zone);
43 static void efx_tc_ct_free(void *ptr, void *arg)
45 struct efx_tc_ct_entry *conn = ptr;
46 struct efx_nic *efx = arg;
48 netif_err(efx, drv, efx->net_dev,
49 "tc ct_entry %lx still present at teardown\n",
50 conn->cookie);
52 /* We can release the counter, but we can't remove the CT itself
53 * from hardware because the table meta is already gone.
55 efx_tc_flower_release_counter(efx, conn->cnt);
56 kfree(conn);
59 int efx_tc_init_conntrack(struct efx_nic *efx)
61 int rc;
63 rc = rhashtable_init(&efx->tc->ct_zone_ht, &efx_tc_ct_zone_ht_params);
64 if (rc < 0)
65 goto fail_ct_zone_ht;
66 rc = rhashtable_init(&efx->tc->ct_ht, &efx_tc_ct_ht_params);
67 if (rc < 0)
68 goto fail_ct_ht;
69 return 0;
70 fail_ct_ht:
71 rhashtable_destroy(&efx->tc->ct_zone_ht);
72 fail_ct_zone_ht:
73 return rc;
76 /* Only call this in init failure teardown.
77 * Normal exit should fini instead as there may be entries in the table.
79 void efx_tc_destroy_conntrack(struct efx_nic *efx)
81 rhashtable_destroy(&efx->tc->ct_ht);
82 rhashtable_destroy(&efx->tc->ct_zone_ht);
85 void efx_tc_fini_conntrack(struct efx_nic *efx)
87 rhashtable_free_and_destroy(&efx->tc->ct_zone_ht, efx_tc_ct_zone_free, NULL);
88 rhashtable_free_and_destroy(&efx->tc->ct_ht, efx_tc_ct_free, efx);
91 #define EFX_NF_TCP_FLAG(flg) cpu_to_be16(be32_to_cpu(TCP_FLAG_##flg) >> 16)
93 static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr,
94 struct efx_tc_ct_entry *conn)
96 struct flow_dissector *dissector = fr->match.dissector;
97 unsigned char ipv = 0;
98 bool tcp = false;
100 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_CONTROL)) {
101 struct flow_match_control fm;
103 flow_rule_match_control(fr, &fm);
104 if (IS_ALL_ONES(fm.mask->addr_type))
105 switch (fm.key->addr_type) {
106 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
107 ipv = 4;
108 break;
109 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
110 ipv = 6;
111 break;
112 default:
113 break;
117 if (!ipv) {
118 netif_dbg(efx, drv, efx->net_dev,
119 "Conntrack missing ipv specification\n");
120 return -EOPNOTSUPP;
123 if (dissector->used_keys &
124 ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
125 BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
126 BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
127 BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
128 BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
129 BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
130 BIT_ULL(FLOW_DISSECTOR_KEY_META))) {
131 netif_dbg(efx, drv, efx->net_dev,
132 "Unsupported conntrack keys %#llx\n",
133 dissector->used_keys);
134 return -EOPNOTSUPP;
137 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_BASIC)) {
138 struct flow_match_basic fm;
140 flow_rule_match_basic(fr, &fm);
141 if (!IS_ALL_ONES(fm.mask->n_proto)) {
142 netif_dbg(efx, drv, efx->net_dev,
143 "Conntrack eth_proto is not exact-match; mask %04x\n",
144 ntohs(fm.mask->n_proto));
145 return -EOPNOTSUPP;
147 conn->eth_proto = fm.key->n_proto;
148 if (conn->eth_proto != (ipv == 4 ? htons(ETH_P_IP)
149 : htons(ETH_P_IPV6))) {
150 netif_dbg(efx, drv, efx->net_dev,
151 "Conntrack eth_proto is not IPv%u, is %04x\n",
152 ipv, ntohs(conn->eth_proto));
153 return -EOPNOTSUPP;
155 if (!IS_ALL_ONES(fm.mask->ip_proto)) {
156 netif_dbg(efx, drv, efx->net_dev,
157 "Conntrack ip_proto is not exact-match; mask %02x\n",
158 fm.mask->ip_proto);
159 return -EOPNOTSUPP;
161 conn->ip_proto = fm.key->ip_proto;
162 switch (conn->ip_proto) {
163 case IPPROTO_TCP:
164 tcp = true;
165 break;
166 case IPPROTO_UDP:
167 break;
168 default:
169 netif_dbg(efx, drv, efx->net_dev,
170 "Conntrack ip_proto not TCP or UDP, is %02x\n",
171 conn->ip_proto);
172 return -EOPNOTSUPP;
174 } else {
175 netif_dbg(efx, drv, efx->net_dev,
176 "Conntrack missing eth_proto, ip_proto\n");
177 return -EOPNOTSUPP;
180 if (ipv == 4 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
181 struct flow_match_ipv4_addrs fm;
183 flow_rule_match_ipv4_addrs(fr, &fm);
184 if (!IS_ALL_ONES(fm.mask->src)) {
185 netif_dbg(efx, drv, efx->net_dev,
186 "Conntrack ipv4.src is not exact-match; mask %08x\n",
187 ntohl(fm.mask->src));
188 return -EOPNOTSUPP;
190 conn->src_ip = fm.key->src;
191 if (!IS_ALL_ONES(fm.mask->dst)) {
192 netif_dbg(efx, drv, efx->net_dev,
193 "Conntrack ipv4.dst is not exact-match; mask %08x\n",
194 ntohl(fm.mask->dst));
195 return -EOPNOTSUPP;
197 conn->dst_ip = fm.key->dst;
198 } else if (ipv == 6 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
199 struct flow_match_ipv6_addrs fm;
201 flow_rule_match_ipv6_addrs(fr, &fm);
202 if (!efx_ipv6_addr_all_ones(&fm.mask->src)) {
203 netif_dbg(efx, drv, efx->net_dev,
204 "Conntrack ipv6.src is not exact-match; mask %pI6\n",
205 &fm.mask->src);
206 return -EOPNOTSUPP;
208 conn->src_ip6 = fm.key->src;
209 if (!efx_ipv6_addr_all_ones(&fm.mask->dst)) {
210 netif_dbg(efx, drv, efx->net_dev,
211 "Conntrack ipv6.dst is not exact-match; mask %pI6\n",
212 &fm.mask->dst);
213 return -EOPNOTSUPP;
215 conn->dst_ip6 = fm.key->dst;
216 } else {
217 netif_dbg(efx, drv, efx->net_dev,
218 "Conntrack missing IPv%u addrs\n", ipv);
219 return -EOPNOTSUPP;
222 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_PORTS)) {
223 struct flow_match_ports fm;
225 flow_rule_match_ports(fr, &fm);
226 if (!IS_ALL_ONES(fm.mask->src)) {
227 netif_dbg(efx, drv, efx->net_dev,
228 "Conntrack ports.src is not exact-match; mask %04x\n",
229 ntohs(fm.mask->src));
230 return -EOPNOTSUPP;
232 conn->l4_sport = fm.key->src;
233 if (!IS_ALL_ONES(fm.mask->dst)) {
234 netif_dbg(efx, drv, efx->net_dev,
235 "Conntrack ports.dst is not exact-match; mask %04x\n",
236 ntohs(fm.mask->dst));
237 return -EOPNOTSUPP;
239 conn->l4_dport = fm.key->dst;
240 } else {
241 netif_dbg(efx, drv, efx->net_dev, "Conntrack missing L4 ports\n");
242 return -EOPNOTSUPP;
245 if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_TCP)) {
246 __be16 tcp_interesting_flags;
247 struct flow_match_tcp fm;
249 if (!tcp) {
250 netif_dbg(efx, drv, efx->net_dev,
251 "Conntrack matching on TCP keys but ipproto is not tcp\n");
252 return -EOPNOTSUPP;
254 flow_rule_match_tcp(fr, &fm);
255 tcp_interesting_flags = EFX_NF_TCP_FLAG(SYN) |
256 EFX_NF_TCP_FLAG(RST) |
257 EFX_NF_TCP_FLAG(FIN);
258 /* If any of the tcp_interesting_flags is set, we always
259 * inhibit CT lookup in LHS (so SW can update CT table).
261 if (fm.key->flags & tcp_interesting_flags) {
262 netif_dbg(efx, drv, efx->net_dev,
263 "Unsupported conntrack tcp.flags %04x/%04x\n",
264 ntohs(fm.key->flags), ntohs(fm.mask->flags));
265 return -EOPNOTSUPP;
267 /* Other TCP flags cannot be filtered at CT */
268 if (fm.mask->flags & ~tcp_interesting_flags) {
269 netif_dbg(efx, drv, efx->net_dev,
270 "Unsupported conntrack tcp.flags %04x/%04x\n",
271 ntohs(fm.key->flags), ntohs(fm.mask->flags));
272 return -EOPNOTSUPP;
276 return 0;
280 * struct efx_tc_ct_mangler_state - tracks which fields have been pedited
282 * @ipv4: IP source or destination addr has been set
283 * @tcpudp: TCP/UDP source or destination port has been set
285 struct efx_tc_ct_mangler_state {
286 u8 ipv4:1;
287 u8 tcpudp:1;
290 static int efx_tc_ct_mangle(struct efx_nic *efx, struct efx_tc_ct_entry *conn,
291 const struct flow_action_entry *fa,
292 struct efx_tc_ct_mangler_state *mung)
294 /* Is this the first mangle we've processed for this rule? */
295 bool first = !(mung->ipv4 || mung->tcpudp);
296 bool dnat = false;
298 switch (fa->mangle.htype) {
299 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
300 switch (fa->mangle.offset) {
301 case offsetof(struct iphdr, daddr):
302 dnat = true;
303 fallthrough;
304 case offsetof(struct iphdr, saddr):
305 if (fa->mangle.mask)
306 return -EOPNOTSUPP;
307 conn->nat_ip = htonl(fa->mangle.val);
308 mung->ipv4 = 1;
309 break;
310 default:
311 return -EOPNOTSUPP;
313 break;
314 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
315 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
316 /* Both struct tcphdr and struct udphdr start with
317 * __be16 source;
318 * __be16 dest;
319 * so we can use the same code for both.
321 switch (fa->mangle.offset) {
322 case offsetof(struct tcphdr, dest):
323 BUILD_BUG_ON(offsetof(struct tcphdr, dest) !=
324 offsetof(struct udphdr, dest));
325 dnat = true;
326 fallthrough;
327 case offsetof(struct tcphdr, source):
328 BUILD_BUG_ON(offsetof(struct tcphdr, source) !=
329 offsetof(struct udphdr, source));
330 if (~fa->mangle.mask != 0xffff)
331 return -EOPNOTSUPP;
332 conn->l4_natport = htons(fa->mangle.val);
333 mung->tcpudp = 1;
334 break;
335 default:
336 return -EOPNOTSUPP;
338 break;
339 default:
340 return -EOPNOTSUPP;
342 /* first mangle tells us whether this is SNAT or DNAT;
343 * subsequent mangles must match that
345 if (first)
346 conn->dnat = dnat;
347 else if (conn->dnat != dnat)
348 return -EOPNOTSUPP;
349 return 0;
352 static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
353 struct flow_cls_offload *tc)
355 struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
356 struct efx_tc_ct_mangler_state mung = {};
357 struct efx_tc_ct_entry *conn, *old;
358 struct efx_nic *efx = ct_zone->efx;
359 const struct flow_action_entry *fa;
360 struct efx_tc_counter *cnt;
361 int rc, i;
363 if (WARN_ON(!efx->tc))
364 return -ENETDOWN;
365 if (WARN_ON(!efx->tc->up))
366 return -ENETDOWN;
368 conn = kzalloc(sizeof(*conn), GFP_USER);
369 if (!conn)
370 return -ENOMEM;
371 conn->cookie = tc->cookie;
372 old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht,
373 &conn->linkage,
374 efx_tc_ct_ht_params);
375 if (IS_ERR(old)) {
376 rc = PTR_ERR(old);
377 goto release;
378 } else if (old) {
379 netif_dbg(efx, drv, efx->net_dev,
380 "Already offloaded conntrack (cookie %lx)\n", tc->cookie);
381 rc = -EEXIST;
382 goto release;
385 /* Parse match */
386 conn->zone = ct_zone;
387 rc = efx_tc_ct_parse_match(efx, fr, conn);
388 if (rc)
389 goto release;
391 /* Parse actions */
392 flow_action_for_each(i, fa, &fr->action) {
393 switch (fa->id) {
394 case FLOW_ACTION_CT_METADATA:
395 conn->mark = fa->ct_metadata.mark;
396 if (memchr_inv(fa->ct_metadata.labels, 0, sizeof(fa->ct_metadata.labels))) {
397 netif_dbg(efx, drv, efx->net_dev,
398 "Setting CT label not supported\n");
399 rc = -EOPNOTSUPP;
400 goto release;
402 break;
403 case FLOW_ACTION_MANGLE:
404 if (conn->eth_proto != htons(ETH_P_IP)) {
405 netif_dbg(efx, drv, efx->net_dev,
406 "NAT only supported for IPv4\n");
407 rc = -EOPNOTSUPP;
408 goto release;
410 rc = efx_tc_ct_mangle(efx, conn, fa, &mung);
411 if (rc)
412 goto release;
413 break;
414 default:
415 netif_dbg(efx, drv, efx->net_dev,
416 "Unhandled action %u for conntrack\n", fa->id);
417 rc = -EOPNOTSUPP;
418 goto release;
422 /* fill in defaults for unmangled values */
423 if (!mung.ipv4)
424 conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip;
425 if (!mung.tcpudp)
426 conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport;
428 cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT);
429 if (IS_ERR(cnt)) {
430 rc = PTR_ERR(cnt);
431 goto release;
433 conn->cnt = cnt;
435 rc = efx_mae_insert_ct(efx, conn);
436 if (rc) {
437 netif_dbg(efx, drv, efx->net_dev,
438 "Failed to insert conntrack, %d\n", rc);
439 goto release;
441 mutex_lock(&ct_zone->mutex);
442 list_add_tail(&conn->list, &ct_zone->cts);
443 mutex_unlock(&ct_zone->mutex);
444 return 0;
445 release:
446 if (conn->cnt)
447 efx_tc_flower_release_counter(efx, conn->cnt);
448 if (!old)
449 rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
450 efx_tc_ct_ht_params);
451 kfree(conn);
452 return rc;
455 /* Caller must follow with efx_tc_ct_remove_finish() after RCU grace period! */
456 static void efx_tc_ct_remove(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
458 int rc;
460 /* Remove it from HW */
461 rc = efx_mae_remove_ct(efx, conn);
462 /* Delete it from SW */
463 rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
464 efx_tc_ct_ht_params);
465 if (rc) {
466 netif_err(efx, drv, efx->net_dev,
467 "Failed to remove conntrack %lx from hw, rc %d\n",
468 conn->cookie, rc);
469 } else {
470 netif_dbg(efx, drv, efx->net_dev, "Removed conntrack %lx\n",
471 conn->cookie);
475 static void efx_tc_ct_remove_finish(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
477 /* Remove related CT counter. This is delayed after the conn object we
478 * are working with has been successfully removed. This protects the
479 * counter from being used-after-free inside efx_tc_ct_stats.
481 efx_tc_flower_release_counter(efx, conn->cnt);
482 kfree(conn);
485 static int efx_tc_ct_destroy(struct efx_tc_ct_zone *ct_zone,
486 struct flow_cls_offload *tc)
488 struct efx_nic *efx = ct_zone->efx;
489 struct efx_tc_ct_entry *conn;
491 conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
492 efx_tc_ct_ht_params);
493 if (!conn) {
494 netif_warn(efx, drv, efx->net_dev,
495 "Conntrack %lx not found to remove\n", tc->cookie);
496 return -ENOENT;
499 mutex_lock(&ct_zone->mutex);
500 list_del(&conn->list);
501 efx_tc_ct_remove(efx, conn);
502 mutex_unlock(&ct_zone->mutex);
503 synchronize_rcu();
504 efx_tc_ct_remove_finish(efx, conn);
505 return 0;
508 static int efx_tc_ct_stats(struct efx_tc_ct_zone *ct_zone,
509 struct flow_cls_offload *tc)
511 struct efx_nic *efx = ct_zone->efx;
512 struct efx_tc_ct_entry *conn;
513 struct efx_tc_counter *cnt;
515 rcu_read_lock();
516 conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
517 efx_tc_ct_ht_params);
518 if (!conn) {
519 netif_warn(efx, drv, efx->net_dev,
520 "Conntrack %lx not found for stats\n", tc->cookie);
521 rcu_read_unlock();
522 return -ENOENT;
525 cnt = conn->cnt;
526 spin_lock_bh(&cnt->lock);
527 /* Report only last use */
528 flow_stats_update(&tc->stats, 0, 0, 0, cnt->touched,
529 FLOW_ACTION_HW_STATS_DELAYED);
530 spin_unlock_bh(&cnt->lock);
531 rcu_read_unlock();
533 return 0;
536 static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
537 void *cb_priv)
539 struct flow_cls_offload *tcb = type_data;
540 struct efx_tc_ct_zone *ct_zone = cb_priv;
542 if (type != TC_SETUP_CLSFLOWER)
543 return -EOPNOTSUPP;
545 switch (tcb->command) {
546 case FLOW_CLS_REPLACE:
547 return efx_tc_ct_replace(ct_zone, tcb);
548 case FLOW_CLS_DESTROY:
549 return efx_tc_ct_destroy(ct_zone, tcb);
550 case FLOW_CLS_STATS:
551 return efx_tc_ct_stats(ct_zone, tcb);
552 default:
553 break;
556 return -EOPNOTSUPP;
559 struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone,
560 struct nf_flowtable *ct_ft)
562 struct efx_tc_ct_zone *ct_zone, *old;
563 int rc;
565 ct_zone = kzalloc(sizeof(*ct_zone), GFP_USER);
566 if (!ct_zone)
567 return ERR_PTR(-ENOMEM);
568 ct_zone->zone = zone;
569 old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_zone_ht,
570 &ct_zone->linkage,
571 efx_tc_ct_zone_ht_params);
572 if (old) {
573 /* don't need our new entry */
574 kfree(ct_zone);
575 if (IS_ERR(old)) /* oh dear, it's actually an error */
576 return ERR_CAST(old);
577 if (!refcount_inc_not_zero(&old->ref))
578 return ERR_PTR(-EAGAIN);
579 /* existing entry found */
580 WARN_ON_ONCE(old->nf_ft != ct_ft);
581 netif_dbg(efx, drv, efx->net_dev,
582 "Found existing ct_zone for %u\n", zone);
583 return old;
585 ct_zone->nf_ft = ct_ft;
586 ct_zone->efx = efx;
587 INIT_LIST_HEAD(&ct_zone->cts);
588 mutex_init(&ct_zone->mutex);
589 rc = nf_flow_table_offload_add_cb(ct_ft, efx_tc_flow_block, ct_zone);
590 netif_dbg(efx, drv, efx->net_dev, "Adding new ct_zone for %u, rc %d\n",
591 zone, rc);
592 if (rc < 0)
593 goto fail;
594 refcount_set(&ct_zone->ref, 1);
595 return ct_zone;
596 fail:
597 rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
598 efx_tc_ct_zone_ht_params);
599 kfree(ct_zone);
600 return ERR_PTR(rc);
603 void efx_tc_ct_unregister_zone(struct efx_nic *efx,
604 struct efx_tc_ct_zone *ct_zone)
606 struct efx_tc_ct_entry *conn, *next;
608 if (!refcount_dec_and_test(&ct_zone->ref))
609 return; /* still in use */
610 nf_flow_table_offload_del_cb(ct_zone->nf_ft, efx_tc_flow_block, ct_zone);
611 rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
612 efx_tc_ct_zone_ht_params);
613 mutex_lock(&ct_zone->mutex);
614 list_for_each_entry(conn, &ct_zone->cts, list)
615 efx_tc_ct_remove(efx, conn);
616 synchronize_rcu();
617 /* need to use _safe because efx_tc_ct_remove_finish() frees conn */
618 list_for_each_entry_safe(conn, next, &ct_zone->cts, list)
619 efx_tc_ct_remove_finish(efx, conn);
620 mutex_unlock(&ct_zone->mutex);
621 mutex_destroy(&ct_zone->mutex);
622 netif_dbg(efx, drv, efx->net_dev, "Removed ct_zone for %u\n",
623 ct_zone->zone);
624 kfree(ct_zone);