1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * SR-IPv6 implementation
6 * David Lebrun <david.lebrun@uclouvain.be>
7 * eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
10 #include <linux/types.h>
11 #include <linux/skbuff.h>
12 #include <linux/net.h>
13 #include <linux/module.h>
15 #include <net/lwtunnel.h>
16 #include <net/netevent.h>
17 #include <net/netns/generic.h>
18 #include <net/ip6_fib.h>
19 #include <net/route.h>
21 #include <linux/seg6.h>
22 #include <linux/seg6_local.h>
23 #include <net/addrconf.h>
24 #include <net/ip6_route.h>
25 #include <net/dst_cache.h>
26 #include <net/ip_tunnels.h>
27 #ifdef CONFIG_IPV6_SEG6_HMAC
28 #include <net/seg6_hmac.h>
30 #include <net/seg6_local.h>
31 #include <linux/etherdevice.h>
32 #include <linux/bpf.h>
34 struct seg6_local_lwt
;
36 /* callbacks used for customizing the creation and destruction of a behavior */
37 struct seg6_local_lwtunnel_ops
{
38 int (*build_state
)(struct seg6_local_lwt
*slwt
, const void *cfg
,
39 struct netlink_ext_ack
*extack
);
40 void (*destroy_state
)(struct seg6_local_lwt
*slwt
);
43 struct seg6_action_desc
{
47 /* The optattrs field is used for specifying all the optional
48 * attributes supported by a specific behavior.
49 * It means that if one of these attributes is not provided in the
50 * netlink message during the behavior creation, no errors will be
51 * returned to the userspace.
53 * Each attribute can be only of two types (mutually exclusive):
54 * 1) required or 2) optional.
55 * Every user MUST obey to this rule! If you set an attribute as
56 * required the same attribute CANNOT be set as optional and vice
59 unsigned long optattrs
;
61 int (*input
)(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
);
64 struct seg6_local_lwtunnel_ops slwt_ops
;
68 struct bpf_prog
*prog
;
72 enum seg6_end_dt_mode
{
73 DT_INVALID_MODE
= -EINVAL
,
78 struct seg6_end_dt_info
{
79 enum seg6_end_dt_mode mode
;
82 /* VRF device associated to the routing table used by the SRv6
83 * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
88 /* tunneled packet proto and family (IPv4 or IPv6) */
94 struct seg6_local_lwt
{
96 struct ipv6_sr_hdr
*srh
;
102 struct bpf_lwt_prog bpf
;
103 #ifdef CONFIG_NET_L3_MASTER_DEV
104 struct seg6_end_dt_info dt_info
;
108 struct seg6_action_desc
*desc
;
109 /* unlike the required attrs, we have to track the optional attributes
110 * that have been effectively parsed.
112 unsigned long parsed_optattrs
;
115 static struct seg6_local_lwt
*seg6_local_lwtunnel(struct lwtunnel_state
*lwt
)
117 return (struct seg6_local_lwt
*)lwt
->data
;
120 static struct ipv6_sr_hdr
*get_srh(struct sk_buff
*skb
)
122 struct ipv6_sr_hdr
*srh
;
125 if (ipv6_find_hdr(skb
, &srhoff
, IPPROTO_ROUTING
, NULL
, NULL
) < 0)
128 if (!pskb_may_pull(skb
, srhoff
+ sizeof(*srh
)))
131 srh
= (struct ipv6_sr_hdr
*)(skb
->data
+ srhoff
);
133 len
= (srh
->hdrlen
+ 1) << 3;
135 if (!pskb_may_pull(skb
, srhoff
+ len
))
138 /* note that pskb_may_pull may change pointers in header;
139 * for this reason it is necessary to reload them when needed.
141 srh
= (struct ipv6_sr_hdr
*)(skb
->data
+ srhoff
);
143 if (!seg6_validate_srh(srh
, len
, true))
149 static struct ipv6_sr_hdr
*get_and_validate_srh(struct sk_buff
*skb
)
151 struct ipv6_sr_hdr
*srh
;
157 if (srh
->segments_left
== 0)
160 #ifdef CONFIG_IPV6_SEG6_HMAC
161 if (!seg6_hmac_validate_skb(skb
))
168 static bool decap_and_validate(struct sk_buff
*skb
, int proto
)
170 struct ipv6_sr_hdr
*srh
;
171 unsigned int off
= 0;
174 if (srh
&& srh
->segments_left
> 0)
177 #ifdef CONFIG_IPV6_SEG6_HMAC
178 if (srh
&& !seg6_hmac_validate_skb(skb
))
182 if (ipv6_find_hdr(skb
, &off
, proto
, NULL
, NULL
) < 0)
185 if (!pskb_pull(skb
, off
))
188 skb_postpull_rcsum(skb
, skb_network_header(skb
), off
);
190 skb_reset_network_header(skb
);
191 skb_reset_transport_header(skb
);
192 if (iptunnel_pull_offloads(skb
))
198 static void advance_nextseg(struct ipv6_sr_hdr
*srh
, struct in6_addr
*daddr
)
200 struct in6_addr
*addr
;
202 srh
->segments_left
--;
203 addr
= srh
->segments
+ srh
->segments_left
;
208 seg6_lookup_any_nexthop(struct sk_buff
*skb
, struct in6_addr
*nhaddr
,
209 u32 tbl_id
, bool local_delivery
)
211 struct net
*net
= dev_net(skb
->dev
);
212 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
213 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
214 struct dst_entry
*dst
= NULL
;
219 fl6
.flowi6_iif
= skb
->dev
->ifindex
;
220 fl6
.daddr
= nhaddr
? *nhaddr
: hdr
->daddr
;
221 fl6
.saddr
= hdr
->saddr
;
222 fl6
.flowlabel
= ip6_flowinfo(hdr
);
223 fl6
.flowi6_mark
= skb
->mark
;
224 fl6
.flowi6_proto
= hdr
->nexthdr
;
227 fl6
.flowi6_flags
= FLOWI_FLAG_KNOWN_NH
;
230 dst
= ip6_route_input_lookup(net
, skb
->dev
, &fl6
, skb
, flags
);
232 struct fib6_table
*table
;
234 table
= fib6_get_table(net
, tbl_id
);
238 rt
= ip6_pol_route(net
, table
, 0, &fl6
, skb
, flags
);
242 /* we want to discard traffic destined for local packet processing,
243 * if @local_delivery is set to false.
246 dev_flags
|= IFF_LOOPBACK
;
248 if (dst
&& (dst
->dev
->flags
& dev_flags
) && !dst
->error
) {
255 rt
= net
->ipv6
.ip6_blk_hole_entry
;
261 skb_dst_set(skb
, dst
);
265 int seg6_lookup_nexthop(struct sk_buff
*skb
,
266 struct in6_addr
*nhaddr
, u32 tbl_id
)
268 return seg6_lookup_any_nexthop(skb
, nhaddr
, tbl_id
, false);
271 /* regular endpoint function */
272 static int input_action_end(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
274 struct ipv6_sr_hdr
*srh
;
276 srh
= get_and_validate_srh(skb
);
280 advance_nextseg(srh
, &ipv6_hdr(skb
)->daddr
);
282 seg6_lookup_nexthop(skb
, NULL
, 0);
284 return dst_input(skb
);
291 /* regular endpoint, and forward to specified nexthop */
292 static int input_action_end_x(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
294 struct ipv6_sr_hdr
*srh
;
296 srh
= get_and_validate_srh(skb
);
300 advance_nextseg(srh
, &ipv6_hdr(skb
)->daddr
);
302 seg6_lookup_nexthop(skb
, &slwt
->nh6
, 0);
304 return dst_input(skb
);
311 static int input_action_end_t(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
313 struct ipv6_sr_hdr
*srh
;
315 srh
= get_and_validate_srh(skb
);
319 advance_nextseg(srh
, &ipv6_hdr(skb
)->daddr
);
321 seg6_lookup_nexthop(skb
, NULL
, slwt
->table
);
323 return dst_input(skb
);
330 /* decapsulate and forward inner L2 frame on specified interface */
331 static int input_action_end_dx2(struct sk_buff
*skb
,
332 struct seg6_local_lwt
*slwt
)
334 struct net
*net
= dev_net(skb
->dev
);
335 struct net_device
*odev
;
338 if (!decap_and_validate(skb
, IPPROTO_ETHERNET
))
341 if (!pskb_may_pull(skb
, ETH_HLEN
))
344 skb_reset_mac_header(skb
);
345 eth
= (struct ethhdr
*)skb
->data
;
347 /* To determine the frame's protocol, we assume it is 802.3. This avoids
348 * a call to eth_type_trans(), which is not really relevant for our
351 if (!eth_proto_is_802_3(eth
->h_proto
))
354 odev
= dev_get_by_index_rcu(net
, slwt
->oif
);
358 /* As we accept Ethernet frames, make sure the egress device is of
361 if (odev
->type
!= ARPHRD_ETHER
)
364 if (!(odev
->flags
& IFF_UP
) || !netif_carrier_ok(odev
))
369 if (skb_warn_if_lro(skb
))
372 skb_forward_csum(skb
);
374 if (skb
->len
- ETH_HLEN
> odev
->mtu
)
378 skb
->protocol
= eth
->h_proto
;
380 return dev_queue_xmit(skb
);
387 /* decapsulate and forward to specified nexthop */
388 static int input_action_end_dx6(struct sk_buff
*skb
,
389 struct seg6_local_lwt
*slwt
)
391 struct in6_addr
*nhaddr
= NULL
;
393 /* this function accepts IPv6 encapsulated packets, with either
394 * an SRH with SL=0, or no SRH.
397 if (!decap_and_validate(skb
, IPPROTO_IPV6
))
400 if (!pskb_may_pull(skb
, sizeof(struct ipv6hdr
)))
403 /* The inner packet is not associated to any local interface,
404 * so we do not call netif_rx().
406 * If slwt->nh6 is set to ::, then lookup the nexthop for the
407 * inner packet's DA. Otherwise, use the specified nexthop.
410 if (!ipv6_addr_any(&slwt
->nh6
))
413 skb_set_transport_header(skb
, sizeof(struct ipv6hdr
));
415 seg6_lookup_nexthop(skb
, nhaddr
, 0);
417 return dst_input(skb
);
423 static int input_action_end_dx4(struct sk_buff
*skb
,
424 struct seg6_local_lwt
*slwt
)
430 if (!decap_and_validate(skb
, IPPROTO_IPIP
))
433 if (!pskb_may_pull(skb
, sizeof(struct iphdr
)))
436 skb
->protocol
= htons(ETH_P_IP
);
440 nhaddr
= slwt
->nh4
.s_addr
?: iph
->daddr
;
444 skb_set_transport_header(skb
, sizeof(struct iphdr
));
446 err
= ip_route_input(skb
, nhaddr
, iph
->saddr
, 0, skb
->dev
);
450 return dst_input(skb
);
457 #ifdef CONFIG_NET_L3_MASTER_DEV
458 static struct net
*fib6_config_get_net(const struct fib6_config
*fib6_cfg
)
460 const struct nl_info
*nli
= &fib6_cfg
->fc_nlinfo
;
465 static int __seg6_end_dt_vrf_build(struct seg6_local_lwt
*slwt
, const void *cfg
,
466 u16 family
, struct netlink_ext_ack
*extack
)
468 struct seg6_end_dt_info
*info
= &slwt
->dt_info
;
472 net
= fib6_config_get_net(cfg
);
474 /* note that vrf_table was already set by parse_nla_vrftable() */
475 vrf_ifindex
= l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF
, net
,
477 if (vrf_ifindex
< 0) {
478 if (vrf_ifindex
== -EPERM
) {
479 NL_SET_ERR_MSG(extack
,
480 "Strict mode for VRF is disabled");
481 } else if (vrf_ifindex
== -ENODEV
) {
482 NL_SET_ERR_MSG(extack
,
483 "Table has no associated VRF device");
485 pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
493 info
->vrf_ifindex
= vrf_ifindex
;
497 info
->proto
= htons(ETH_P_IP
);
498 info
->hdrlen
= sizeof(struct iphdr
);
501 info
->proto
= htons(ETH_P_IPV6
);
502 info
->hdrlen
= sizeof(struct ipv6hdr
);
508 info
->family
= family
;
509 info
->mode
= DT_VRF_MODE
;
514 /* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
515 * routes the IPv4/IPv6 packet by looking at the configured routing table.
517 * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
518 * Routing Header packets) from several interfaces and the outer IPv6
519 * destination address (DA) is used for retrieving the specific instance of the
520 * End.DT4/DT6 behavior that should process the packets.
522 * However, the inner IPv4/IPv6 packet is not really bound to any receiving
523 * interface and thus the End.DT4/DT6 sets the VRF (associated with the
524 * corresponding routing table) as the *receiving* interface.
525 * In other words, the End.DT4/DT6 processes a packet as if it has been received
526 * directly by the VRF (and not by one of its slave devices, if any).
527 * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
528 * according to the routing table configured by the End.DT4/DT6 instance.
530 * This design allows you to get some interesting features like:
531 * 1) the statistics on rx packets;
532 * 2) the possibility to install a packet sniffer on the receiving interface
533 * (the VRF one) for looking at the incoming packets;
534 * 3) the possibility to leverage the netfilter prerouting hook for the inner
537 * This function returns:
538 * - the sk_buff* when the VRF rcv handler has processed the packet correctly;
539 * - NULL when the skb is consumed by the VRF rcv handler;
540 * - a pointer which encodes a negative error number in case of error.
541 * Note that in this case, the function takes care of freeing the skb.
543 static struct sk_buff
*end_dt_vrf_rcv(struct sk_buff
*skb
, u16 family
,
544 struct net_device
*dev
)
546 /* based on l3mdev_ip_rcv; we are only interested in the master */
547 if (unlikely(!netif_is_l3_master(dev
) && !netif_has_l3_rx_handler(dev
)))
550 if (unlikely(!dev
->l3mdev_ops
->l3mdev_l3_rcv
))
553 /* the decap packet IPv4/IPv6 does not come with any mac header info.
554 * We must unset the mac header to allow the VRF device to rebuild it,
555 * just in case there is a sniffer attached on the device.
557 skb_unset_mac_header(skb
);
559 skb
= dev
->l3mdev_ops
->l3mdev_l3_rcv(dev
, skb
, family
);
561 /* the skb buffer was consumed by the handler */
564 /* when a packet is received by a VRF or by one of its slaves, the
565 * master device reference is set into the skb.
567 if (unlikely(skb
->dev
!= dev
|| skb
->skb_iif
!= dev
->ifindex
))
574 return ERR_PTR(-EINVAL
);
577 static struct net_device
*end_dt_get_vrf_rcu(struct sk_buff
*skb
,
578 struct seg6_end_dt_info
*info
)
580 int vrf_ifindex
= info
->vrf_ifindex
;
581 struct net
*net
= info
->net
;
583 if (unlikely(vrf_ifindex
< 0))
586 if (unlikely(!net_eq(dev_net(skb
->dev
), net
)))
589 return dev_get_by_index_rcu(net
, vrf_ifindex
);
595 static struct sk_buff
*end_dt_vrf_core(struct sk_buff
*skb
,
596 struct seg6_local_lwt
*slwt
)
598 struct seg6_end_dt_info
*info
= &slwt
->dt_info
;
599 struct net_device
*vrf
;
601 vrf
= end_dt_get_vrf_rcu(skb
, info
);
605 skb
->protocol
= info
->proto
;
609 skb_set_transport_header(skb
, info
->hdrlen
);
611 return end_dt_vrf_rcv(skb
, info
->family
, vrf
);
615 return ERR_PTR(-EINVAL
);
618 static int input_action_end_dt4(struct sk_buff
*skb
,
619 struct seg6_local_lwt
*slwt
)
624 if (!decap_and_validate(skb
, IPPROTO_IPIP
))
627 if (!pskb_may_pull(skb
, sizeof(struct iphdr
)))
630 skb
= end_dt_vrf_core(skb
, slwt
);
632 /* packet has been processed and consumed by the VRF */
640 err
= ip_route_input(skb
, iph
->daddr
, iph
->saddr
, 0, skb
->dev
);
644 return dst_input(skb
);
651 static int seg6_end_dt4_build(struct seg6_local_lwt
*slwt
, const void *cfg
,
652 struct netlink_ext_ack
*extack
)
654 return __seg6_end_dt_vrf_build(slwt
, cfg
, AF_INET
, extack
);
658 seg6_end_dt_mode
seg6_end_dt6_parse_mode(struct seg6_local_lwt
*slwt
)
660 unsigned long parsed_optattrs
= slwt
->parsed_optattrs
;
661 bool legacy
, vrfmode
;
663 legacy
= !!(parsed_optattrs
& (1 << SEG6_LOCAL_TABLE
));
664 vrfmode
= !!(parsed_optattrs
& (1 << SEG6_LOCAL_VRFTABLE
));
666 if (!(legacy
^ vrfmode
))
667 /* both are absent or present: invalid DT6 mode */
668 return DT_INVALID_MODE
;
670 return legacy
? DT_LEGACY_MODE
: DT_VRF_MODE
;
673 static enum seg6_end_dt_mode
seg6_end_dt6_get_mode(struct seg6_local_lwt
*slwt
)
675 struct seg6_end_dt_info
*info
= &slwt
->dt_info
;
680 static int seg6_end_dt6_build(struct seg6_local_lwt
*slwt
, const void *cfg
,
681 struct netlink_ext_ack
*extack
)
683 enum seg6_end_dt_mode mode
= seg6_end_dt6_parse_mode(slwt
);
684 struct seg6_end_dt_info
*info
= &slwt
->dt_info
;
688 info
->mode
= DT_LEGACY_MODE
;
691 return __seg6_end_dt_vrf_build(slwt
, cfg
, AF_INET6
, extack
);
693 NL_SET_ERR_MSG(extack
, "table or vrftable must be specified");
699 static int input_action_end_dt6(struct sk_buff
*skb
,
700 struct seg6_local_lwt
*slwt
)
702 if (!decap_and_validate(skb
, IPPROTO_IPV6
))
705 if (!pskb_may_pull(skb
, sizeof(struct ipv6hdr
)))
708 #ifdef CONFIG_NET_L3_MASTER_DEV
709 if (seg6_end_dt6_get_mode(slwt
) == DT_LEGACY_MODE
)
713 skb
= end_dt_vrf_core(skb
, slwt
);
715 /* packet has been processed and consumed by the VRF */
721 /* note: this time we do not need to specify the table because the VRF
722 * takes care of selecting the correct table.
724 seg6_lookup_any_nexthop(skb
, NULL
, 0, true);
726 return dst_input(skb
);
730 skb_set_transport_header(skb
, sizeof(struct ipv6hdr
));
732 seg6_lookup_any_nexthop(skb
, NULL
, slwt
->table
, true);
734 return dst_input(skb
);
741 /* push an SRH on top of the current one */
742 static int input_action_end_b6(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
744 struct ipv6_sr_hdr
*srh
;
747 srh
= get_and_validate_srh(skb
);
751 err
= seg6_do_srh_inline(skb
, slwt
->srh
);
755 ipv6_hdr(skb
)->payload_len
= htons(skb
->len
- sizeof(struct ipv6hdr
));
756 skb_set_transport_header(skb
, sizeof(struct ipv6hdr
));
758 seg6_lookup_nexthop(skb
, NULL
, 0);
760 return dst_input(skb
);
767 /* encapsulate within an outer IPv6 header and a specified SRH */
768 static int input_action_end_b6_encap(struct sk_buff
*skb
,
769 struct seg6_local_lwt
*slwt
)
771 struct ipv6_sr_hdr
*srh
;
774 srh
= get_and_validate_srh(skb
);
778 advance_nextseg(srh
, &ipv6_hdr(skb
)->daddr
);
780 skb_reset_inner_headers(skb
);
781 skb
->encapsulation
= 1;
783 err
= seg6_do_srh_encap(skb
, slwt
->srh
, IPPROTO_IPV6
);
787 ipv6_hdr(skb
)->payload_len
= htons(skb
->len
- sizeof(struct ipv6hdr
));
788 skb_set_transport_header(skb
, sizeof(struct ipv6hdr
));
790 seg6_lookup_nexthop(skb
, NULL
, 0);
792 return dst_input(skb
);
799 DEFINE_PER_CPU(struct seg6_bpf_srh_state
, seg6_bpf_srh_states
);
801 bool seg6_bpf_has_valid_srh(struct sk_buff
*skb
)
803 struct seg6_bpf_srh_state
*srh_state
=
804 this_cpu_ptr(&seg6_bpf_srh_states
);
805 struct ipv6_sr_hdr
*srh
= srh_state
->srh
;
807 if (unlikely(srh
== NULL
))
810 if (unlikely(!srh_state
->valid
)) {
811 if ((srh_state
->hdrlen
& 7) != 0)
814 srh
->hdrlen
= (u8
)(srh_state
->hdrlen
>> 3);
815 if (!seg6_validate_srh(srh
, (srh
->hdrlen
+ 1) << 3, true))
818 srh_state
->valid
= true;
824 static int input_action_end_bpf(struct sk_buff
*skb
,
825 struct seg6_local_lwt
*slwt
)
827 struct seg6_bpf_srh_state
*srh_state
=
828 this_cpu_ptr(&seg6_bpf_srh_states
);
829 struct ipv6_sr_hdr
*srh
;
832 srh
= get_and_validate_srh(skb
);
837 advance_nextseg(srh
, &ipv6_hdr(skb
)->daddr
);
839 /* preempt_disable is needed to protect the per-CPU buffer srh_state,
840 * which is also accessed by the bpf_lwt_seg6_* helpers
843 srh_state
->srh
= srh
;
844 srh_state
->hdrlen
= srh
->hdrlen
<< 3;
845 srh_state
->valid
= true;
848 bpf_compute_data_pointers(skb
);
849 ret
= bpf_prog_run_save_cb(slwt
->bpf
.prog
, skb
);
859 pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret
);
863 if (srh_state
->srh
&& !seg6_bpf_has_valid_srh(skb
))
867 if (ret
!= BPF_REDIRECT
)
868 seg6_lookup_nexthop(skb
, NULL
, 0);
870 return dst_input(skb
);
878 static struct seg6_action_desc seg6_action_table
[] = {
880 .action
= SEG6_LOCAL_ACTION_END
,
882 .input
= input_action_end
,
885 .action
= SEG6_LOCAL_ACTION_END_X
,
886 .attrs
= (1 << SEG6_LOCAL_NH6
),
887 .input
= input_action_end_x
,
890 .action
= SEG6_LOCAL_ACTION_END_T
,
891 .attrs
= (1 << SEG6_LOCAL_TABLE
),
892 .input
= input_action_end_t
,
895 .action
= SEG6_LOCAL_ACTION_END_DX2
,
896 .attrs
= (1 << SEG6_LOCAL_OIF
),
897 .input
= input_action_end_dx2
,
900 .action
= SEG6_LOCAL_ACTION_END_DX6
,
901 .attrs
= (1 << SEG6_LOCAL_NH6
),
902 .input
= input_action_end_dx6
,
905 .action
= SEG6_LOCAL_ACTION_END_DX4
,
906 .attrs
= (1 << SEG6_LOCAL_NH4
),
907 .input
= input_action_end_dx4
,
910 .action
= SEG6_LOCAL_ACTION_END_DT4
,
911 .attrs
= (1 << SEG6_LOCAL_VRFTABLE
),
912 #ifdef CONFIG_NET_L3_MASTER_DEV
913 .input
= input_action_end_dt4
,
915 .build_state
= seg6_end_dt4_build
,
920 .action
= SEG6_LOCAL_ACTION_END_DT6
,
921 #ifdef CONFIG_NET_L3_MASTER_DEV
923 .optattrs
= (1 << SEG6_LOCAL_TABLE
) |
924 (1 << SEG6_LOCAL_VRFTABLE
),
926 .build_state
= seg6_end_dt6_build
,
929 .attrs
= (1 << SEG6_LOCAL_TABLE
),
931 .input
= input_action_end_dt6
,
934 .action
= SEG6_LOCAL_ACTION_END_B6
,
935 .attrs
= (1 << SEG6_LOCAL_SRH
),
936 .input
= input_action_end_b6
,
939 .action
= SEG6_LOCAL_ACTION_END_B6_ENCAP
,
940 .attrs
= (1 << SEG6_LOCAL_SRH
),
941 .input
= input_action_end_b6_encap
,
942 .static_headroom
= sizeof(struct ipv6hdr
),
945 .action
= SEG6_LOCAL_ACTION_END_BPF
,
946 .attrs
= (1 << SEG6_LOCAL_BPF
),
947 .input
= input_action_end_bpf
,
952 static struct seg6_action_desc
*__get_action_desc(int action
)
954 struct seg6_action_desc
*desc
;
957 count
= ARRAY_SIZE(seg6_action_table
);
958 for (i
= 0; i
< count
; i
++) {
959 desc
= &seg6_action_table
[i
];
960 if (desc
->action
== action
)
967 static int seg6_local_input(struct sk_buff
*skb
)
969 struct dst_entry
*orig_dst
= skb_dst(skb
);
970 struct seg6_action_desc
*desc
;
971 struct seg6_local_lwt
*slwt
;
973 if (skb
->protocol
!= htons(ETH_P_IPV6
)) {
978 slwt
= seg6_local_lwtunnel(orig_dst
->lwtstate
);
981 return desc
->input(skb
, slwt
);
984 static const struct nla_policy seg6_local_policy
[SEG6_LOCAL_MAX
+ 1] = {
985 [SEG6_LOCAL_ACTION
] = { .type
= NLA_U32
},
986 [SEG6_LOCAL_SRH
] = { .type
= NLA_BINARY
},
987 [SEG6_LOCAL_TABLE
] = { .type
= NLA_U32
},
988 [SEG6_LOCAL_VRFTABLE
] = { .type
= NLA_U32
},
989 [SEG6_LOCAL_NH4
] = { .type
= NLA_BINARY
,
990 .len
= sizeof(struct in_addr
) },
991 [SEG6_LOCAL_NH6
] = { .type
= NLA_BINARY
,
992 .len
= sizeof(struct in6_addr
) },
993 [SEG6_LOCAL_IIF
] = { .type
= NLA_U32
},
994 [SEG6_LOCAL_OIF
] = { .type
= NLA_U32
},
995 [SEG6_LOCAL_BPF
] = { .type
= NLA_NESTED
},
998 static int parse_nla_srh(struct nlattr
**attrs
, struct seg6_local_lwt
*slwt
)
1000 struct ipv6_sr_hdr
*srh
;
1003 srh
= nla_data(attrs
[SEG6_LOCAL_SRH
]);
1004 len
= nla_len(attrs
[SEG6_LOCAL_SRH
]);
1006 /* SRH must contain at least one segment */
1007 if (len
< sizeof(*srh
) + sizeof(struct in6_addr
))
1010 if (!seg6_validate_srh(srh
, len
, false))
1013 slwt
->srh
= kmemdup(srh
, len
, GFP_KERNEL
);
1017 slwt
->headroom
+= len
;
1022 static int put_nla_srh(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
1024 struct ipv6_sr_hdr
*srh
;
1029 len
= (srh
->hdrlen
+ 1) << 3;
1031 nla
= nla_reserve(skb
, SEG6_LOCAL_SRH
, len
);
1035 memcpy(nla_data(nla
), srh
, len
);
1040 static int cmp_nla_srh(struct seg6_local_lwt
*a
, struct seg6_local_lwt
*b
)
1042 int len
= (a
->srh
->hdrlen
+ 1) << 3;
1044 if (len
!= ((b
->srh
->hdrlen
+ 1) << 3))
1047 return memcmp(a
->srh
, b
->srh
, len
);
1050 static void destroy_attr_srh(struct seg6_local_lwt
*slwt
)
1055 static int parse_nla_table(struct nlattr
**attrs
, struct seg6_local_lwt
*slwt
)
1057 slwt
->table
= nla_get_u32(attrs
[SEG6_LOCAL_TABLE
]);
1062 static int put_nla_table(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
1064 if (nla_put_u32(skb
, SEG6_LOCAL_TABLE
, slwt
->table
))
1070 static int cmp_nla_table(struct seg6_local_lwt
*a
, struct seg6_local_lwt
*b
)
1072 if (a
->table
!= b
->table
)
1079 seg6_end_dt_info
*seg6_possible_end_dt_info(struct seg6_local_lwt
*slwt
)
1081 #ifdef CONFIG_NET_L3_MASTER_DEV
1082 return &slwt
->dt_info
;
1084 return ERR_PTR(-EOPNOTSUPP
);
1088 static int parse_nla_vrftable(struct nlattr
**attrs
,
1089 struct seg6_local_lwt
*slwt
)
1091 struct seg6_end_dt_info
*info
= seg6_possible_end_dt_info(slwt
);
1094 return PTR_ERR(info
);
1096 info
->vrf_table
= nla_get_u32(attrs
[SEG6_LOCAL_VRFTABLE
]);
1101 static int put_nla_vrftable(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
1103 struct seg6_end_dt_info
*info
= seg6_possible_end_dt_info(slwt
);
1106 return PTR_ERR(info
);
1108 if (nla_put_u32(skb
, SEG6_LOCAL_VRFTABLE
, info
->vrf_table
))
1114 static int cmp_nla_vrftable(struct seg6_local_lwt
*a
, struct seg6_local_lwt
*b
)
1116 struct seg6_end_dt_info
*info_a
= seg6_possible_end_dt_info(a
);
1117 struct seg6_end_dt_info
*info_b
= seg6_possible_end_dt_info(b
);
1119 if (info_a
->vrf_table
!= info_b
->vrf_table
)
1125 static int parse_nla_nh4(struct nlattr
**attrs
, struct seg6_local_lwt
*slwt
)
1127 memcpy(&slwt
->nh4
, nla_data(attrs
[SEG6_LOCAL_NH4
]),
1128 sizeof(struct in_addr
));
1133 static int put_nla_nh4(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
1137 nla
= nla_reserve(skb
, SEG6_LOCAL_NH4
, sizeof(struct in_addr
));
1141 memcpy(nla_data(nla
), &slwt
->nh4
, sizeof(struct in_addr
));
1146 static int cmp_nla_nh4(struct seg6_local_lwt
*a
, struct seg6_local_lwt
*b
)
1148 return memcmp(&a
->nh4
, &b
->nh4
, sizeof(struct in_addr
));
1151 static int parse_nla_nh6(struct nlattr
**attrs
, struct seg6_local_lwt
*slwt
)
1153 memcpy(&slwt
->nh6
, nla_data(attrs
[SEG6_LOCAL_NH6
]),
1154 sizeof(struct in6_addr
));
1159 static int put_nla_nh6(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
1163 nla
= nla_reserve(skb
, SEG6_LOCAL_NH6
, sizeof(struct in6_addr
));
1167 memcpy(nla_data(nla
), &slwt
->nh6
, sizeof(struct in6_addr
));
1172 static int cmp_nla_nh6(struct seg6_local_lwt
*a
, struct seg6_local_lwt
*b
)
1174 return memcmp(&a
->nh6
, &b
->nh6
, sizeof(struct in6_addr
));
1177 static int parse_nla_iif(struct nlattr
**attrs
, struct seg6_local_lwt
*slwt
)
1179 slwt
->iif
= nla_get_u32(attrs
[SEG6_LOCAL_IIF
]);
1184 static int put_nla_iif(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
1186 if (nla_put_u32(skb
, SEG6_LOCAL_IIF
, slwt
->iif
))
1192 static int cmp_nla_iif(struct seg6_local_lwt
*a
, struct seg6_local_lwt
*b
)
1194 if (a
->iif
!= b
->iif
)
1200 static int parse_nla_oif(struct nlattr
**attrs
, struct seg6_local_lwt
*slwt
)
1202 slwt
->oif
= nla_get_u32(attrs
[SEG6_LOCAL_OIF
]);
1207 static int put_nla_oif(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
1209 if (nla_put_u32(skb
, SEG6_LOCAL_OIF
, slwt
->oif
))
1215 static int cmp_nla_oif(struct seg6_local_lwt
*a
, struct seg6_local_lwt
*b
)
1217 if (a
->oif
!= b
->oif
)
1223 #define MAX_PROG_NAME 256
1224 static const struct nla_policy bpf_prog_policy
[SEG6_LOCAL_BPF_PROG_MAX
+ 1] = {
1225 [SEG6_LOCAL_BPF_PROG
] = { .type
= NLA_U32
, },
1226 [SEG6_LOCAL_BPF_PROG_NAME
] = { .type
= NLA_NUL_STRING
,
1227 .len
= MAX_PROG_NAME
},
1230 static int parse_nla_bpf(struct nlattr
**attrs
, struct seg6_local_lwt
*slwt
)
1232 struct nlattr
*tb
[SEG6_LOCAL_BPF_PROG_MAX
+ 1];
1237 ret
= nla_parse_nested_deprecated(tb
, SEG6_LOCAL_BPF_PROG_MAX
,
1238 attrs
[SEG6_LOCAL_BPF
],
1239 bpf_prog_policy
, NULL
);
1243 if (!tb
[SEG6_LOCAL_BPF_PROG
] || !tb
[SEG6_LOCAL_BPF_PROG_NAME
])
1246 slwt
->bpf
.name
= nla_memdup(tb
[SEG6_LOCAL_BPF_PROG_NAME
], GFP_KERNEL
);
1247 if (!slwt
->bpf
.name
)
1250 fd
= nla_get_u32(tb
[SEG6_LOCAL_BPF_PROG
]);
1251 p
= bpf_prog_get_type(fd
, BPF_PROG_TYPE_LWT_SEG6LOCAL
);
1253 kfree(slwt
->bpf
.name
);
1261 static int put_nla_bpf(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
)
1263 struct nlattr
*nest
;
1265 if (!slwt
->bpf
.prog
)
1268 nest
= nla_nest_start_noflag(skb
, SEG6_LOCAL_BPF
);
1272 if (nla_put_u32(skb
, SEG6_LOCAL_BPF_PROG
, slwt
->bpf
.prog
->aux
->id
))
1275 if (slwt
->bpf
.name
&&
1276 nla_put_string(skb
, SEG6_LOCAL_BPF_PROG_NAME
, slwt
->bpf
.name
))
1279 return nla_nest_end(skb
, nest
);
1282 static int cmp_nla_bpf(struct seg6_local_lwt
*a
, struct seg6_local_lwt
*b
)
1284 if (!a
->bpf
.name
&& !b
->bpf
.name
)
1287 if (!a
->bpf
.name
|| !b
->bpf
.name
)
1290 return strcmp(a
->bpf
.name
, b
->bpf
.name
);
1293 static void destroy_attr_bpf(struct seg6_local_lwt
*slwt
)
1295 kfree(slwt
->bpf
.name
);
1297 bpf_prog_put(slwt
->bpf
.prog
);
1300 struct seg6_action_param
{
1301 int (*parse
)(struct nlattr
**attrs
, struct seg6_local_lwt
*slwt
);
1302 int (*put
)(struct sk_buff
*skb
, struct seg6_local_lwt
*slwt
);
1303 int (*cmp
)(struct seg6_local_lwt
*a
, struct seg6_local_lwt
*b
);
1305 /* optional destroy() callback useful for releasing resources which
1306 * have been previously acquired in the corresponding parse()
1309 void (*destroy
)(struct seg6_local_lwt
*slwt
);
1312 static struct seg6_action_param seg6_action_params
[SEG6_LOCAL_MAX
+ 1] = {
1313 [SEG6_LOCAL_SRH
] = { .parse
= parse_nla_srh
,
1316 .destroy
= destroy_attr_srh
},
1318 [SEG6_LOCAL_TABLE
] = { .parse
= parse_nla_table
,
1319 .put
= put_nla_table
,
1320 .cmp
= cmp_nla_table
},
1322 [SEG6_LOCAL_NH4
] = { .parse
= parse_nla_nh4
,
1324 .cmp
= cmp_nla_nh4
},
1326 [SEG6_LOCAL_NH6
] = { .parse
= parse_nla_nh6
,
1328 .cmp
= cmp_nla_nh6
},
1330 [SEG6_LOCAL_IIF
] = { .parse
= parse_nla_iif
,
1332 .cmp
= cmp_nla_iif
},
1334 [SEG6_LOCAL_OIF
] = { .parse
= parse_nla_oif
,
1336 .cmp
= cmp_nla_oif
},
1338 [SEG6_LOCAL_BPF
] = { .parse
= parse_nla_bpf
,
1341 .destroy
= destroy_attr_bpf
},
1343 [SEG6_LOCAL_VRFTABLE
] = { .parse
= parse_nla_vrftable
,
1344 .put
= put_nla_vrftable
,
1345 .cmp
= cmp_nla_vrftable
},
1349 /* call the destroy() callback (if available) for each set attribute in
1350 * @parsed_attrs, starting from the first attribute up to the @max_parsed
1351 * (excluded) attribute.
1353 static void __destroy_attrs(unsigned long parsed_attrs
, int max_parsed
,
1354 struct seg6_local_lwt
*slwt
)
1356 struct seg6_action_param
*param
;
1359 /* Every required seg6local attribute is identified by an ID which is
1360 * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
1362 * We scan the 'parsed_attrs' bitmask, starting from the first attribute
1363 * up to the @max_parsed (excluded) attribute.
1364 * For each set attribute, we retrieve the corresponding destroy()
1365 * callback. If the callback is not available, then we skip to the next
1366 * attribute; otherwise, we call the destroy() callback.
1368 for (i
= 0; i
< max_parsed
; ++i
) {
1369 if (!(parsed_attrs
& (1 << i
)))
1372 param
= &seg6_action_params
[i
];
1375 param
->destroy(slwt
);
1379 /* release all the resources that may have been acquired during parsing
1382 static void destroy_attrs(struct seg6_local_lwt
*slwt
)
1384 unsigned long attrs
= slwt
->desc
->attrs
| slwt
->parsed_optattrs
;
1386 __destroy_attrs(attrs
, SEG6_LOCAL_MAX
+ 1, slwt
);
1389 static int parse_nla_optional_attrs(struct nlattr
**attrs
,
1390 struct seg6_local_lwt
*slwt
)
1392 struct seg6_action_desc
*desc
= slwt
->desc
;
1393 unsigned long parsed_optattrs
= 0;
1394 struct seg6_action_param
*param
;
1397 for (i
= 0; i
< SEG6_LOCAL_MAX
+ 1; ++i
) {
1398 if (!(desc
->optattrs
& (1 << i
)) || !attrs
[i
])
1401 /* once here, the i-th attribute is provided by the
1402 * userspace AND it is identified optional as well.
1404 param
= &seg6_action_params
[i
];
1406 err
= param
->parse(attrs
, slwt
);
1408 goto parse_optattrs_err
;
1410 /* current attribute has been correctly parsed */
1411 parsed_optattrs
|= (1 << i
);
1414 /* store in the tunnel state all the optional attributed successfully
1417 slwt
->parsed_optattrs
= parsed_optattrs
;
1422 __destroy_attrs(parsed_optattrs
, i
, slwt
);
1427 /* call the custom constructor of the behavior during its initialization phase
1428 * and after that all its attributes have been parsed successfully.
1431 seg6_local_lwtunnel_build_state(struct seg6_local_lwt
*slwt
, const void *cfg
,
1432 struct netlink_ext_ack
*extack
)
1434 struct seg6_action_desc
*desc
= slwt
->desc
;
1435 struct seg6_local_lwtunnel_ops
*ops
;
1437 ops
= &desc
->slwt_ops
;
1438 if (!ops
->build_state
)
1441 return ops
->build_state(slwt
, cfg
, extack
);
1444 /* call the custom destructor of the behavior which is invoked before the
1445 * tunnel is going to be destroyed.
1447 static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt
*slwt
)
1449 struct seg6_action_desc
*desc
= slwt
->desc
;
1450 struct seg6_local_lwtunnel_ops
*ops
;
1452 ops
= &desc
->slwt_ops
;
1453 if (!ops
->destroy_state
)
1456 ops
->destroy_state(slwt
);
1459 static int parse_nla_action(struct nlattr
**attrs
, struct seg6_local_lwt
*slwt
)
1461 struct seg6_action_param
*param
;
1462 struct seg6_action_desc
*desc
;
1463 unsigned long invalid_attrs
;
1466 desc
= __get_action_desc(slwt
->action
);
1474 slwt
->headroom
+= desc
->static_headroom
;
1476 /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
1477 * disjoined, this allow us to release acquired resources by optional
1478 * attributes and by required attributes independently from each other
1479 * without any interfarence.
1480 * In other terms, we are sure that we do not release some the acquired
1483 * Note that if an attribute is configured both as required and as
1484 * optional, it means that the user has messed something up in the
1485 * seg6_action_table. Therefore, this check is required for SRv6
1486 * behaviors to work properly.
1488 invalid_attrs
= desc
->attrs
& desc
->optattrs
;
1489 if (invalid_attrs
) {
1491 "An attribute cannot be both required AND optional");
1495 /* parse the required attributes */
1496 for (i
= 0; i
< SEG6_LOCAL_MAX
+ 1; i
++) {
1497 if (desc
->attrs
& (1 << i
)) {
1501 param
= &seg6_action_params
[i
];
1503 err
= param
->parse(attrs
, slwt
);
1505 goto parse_attrs_err
;
1509 /* parse the optional attributes, if any */
1510 err
= parse_nla_optional_attrs(attrs
, slwt
);
1512 goto parse_attrs_err
;
1517 /* release any resource that may have been acquired during the i-1
1518 * parse() operations.
1520 __destroy_attrs(desc
->attrs
, i
, slwt
);
1525 static int seg6_local_build_state(struct net
*net
, struct nlattr
*nla
,
1526 unsigned int family
, const void *cfg
,
1527 struct lwtunnel_state
**ts
,
1528 struct netlink_ext_ack
*extack
)
1530 struct nlattr
*tb
[SEG6_LOCAL_MAX
+ 1];
1531 struct lwtunnel_state
*newts
;
1532 struct seg6_local_lwt
*slwt
;
1535 if (family
!= AF_INET6
)
1538 err
= nla_parse_nested_deprecated(tb
, SEG6_LOCAL_MAX
, nla
,
1539 seg6_local_policy
, extack
);
1544 if (!tb
[SEG6_LOCAL_ACTION
])
1547 newts
= lwtunnel_state_alloc(sizeof(*slwt
));
1551 slwt
= seg6_local_lwtunnel(newts
);
1552 slwt
->action
= nla_get_u32(tb
[SEG6_LOCAL_ACTION
]);
1554 err
= parse_nla_action(tb
, slwt
);
1558 err
= seg6_local_lwtunnel_build_state(slwt
, cfg
, extack
);
1560 goto out_destroy_attrs
;
1562 newts
->type
= LWTUNNEL_ENCAP_SEG6_LOCAL
;
1563 newts
->flags
= LWTUNNEL_STATE_INPUT_REDIRECT
;
1564 newts
->headroom
= slwt
->headroom
;
1571 destroy_attrs(slwt
);
1577 static void seg6_local_destroy_state(struct lwtunnel_state
*lwt
)
1579 struct seg6_local_lwt
*slwt
= seg6_local_lwtunnel(lwt
);
1581 seg6_local_lwtunnel_destroy_state(slwt
);
1583 destroy_attrs(slwt
);
1588 static int seg6_local_fill_encap(struct sk_buff
*skb
,
1589 struct lwtunnel_state
*lwt
)
1591 struct seg6_local_lwt
*slwt
= seg6_local_lwtunnel(lwt
);
1592 struct seg6_action_param
*param
;
1593 unsigned long attrs
;
1596 if (nla_put_u32(skb
, SEG6_LOCAL_ACTION
, slwt
->action
))
1599 attrs
= slwt
->desc
->attrs
| slwt
->parsed_optattrs
;
1601 for (i
= 0; i
< SEG6_LOCAL_MAX
+ 1; i
++) {
1602 if (attrs
& (1 << i
)) {
1603 param
= &seg6_action_params
[i
];
1604 err
= param
->put(skb
, slwt
);
1613 static int seg6_local_get_encap_size(struct lwtunnel_state
*lwt
)
1615 struct seg6_local_lwt
*slwt
= seg6_local_lwtunnel(lwt
);
1616 unsigned long attrs
;
1619 nlsize
= nla_total_size(4); /* action */
1621 attrs
= slwt
->desc
->attrs
| slwt
->parsed_optattrs
;
1623 if (attrs
& (1 << SEG6_LOCAL_SRH
))
1624 nlsize
+= nla_total_size((slwt
->srh
->hdrlen
+ 1) << 3);
1626 if (attrs
& (1 << SEG6_LOCAL_TABLE
))
1627 nlsize
+= nla_total_size(4);
1629 if (attrs
& (1 << SEG6_LOCAL_NH4
))
1630 nlsize
+= nla_total_size(4);
1632 if (attrs
& (1 << SEG6_LOCAL_NH6
))
1633 nlsize
+= nla_total_size(16);
1635 if (attrs
& (1 << SEG6_LOCAL_IIF
))
1636 nlsize
+= nla_total_size(4);
1638 if (attrs
& (1 << SEG6_LOCAL_OIF
))
1639 nlsize
+= nla_total_size(4);
1641 if (attrs
& (1 << SEG6_LOCAL_BPF
))
1642 nlsize
+= nla_total_size(sizeof(struct nlattr
)) +
1643 nla_total_size(MAX_PROG_NAME
) +
1646 if (attrs
& (1 << SEG6_LOCAL_VRFTABLE
))
1647 nlsize
+= nla_total_size(4);
1652 static int seg6_local_cmp_encap(struct lwtunnel_state
*a
,
1653 struct lwtunnel_state
*b
)
1655 struct seg6_local_lwt
*slwt_a
, *slwt_b
;
1656 struct seg6_action_param
*param
;
1657 unsigned long attrs_a
, attrs_b
;
1660 slwt_a
= seg6_local_lwtunnel(a
);
1661 slwt_b
= seg6_local_lwtunnel(b
);
1663 if (slwt_a
->action
!= slwt_b
->action
)
1666 attrs_a
= slwt_a
->desc
->attrs
| slwt_a
->parsed_optattrs
;
1667 attrs_b
= slwt_b
->desc
->attrs
| slwt_b
->parsed_optattrs
;
1669 if (attrs_a
!= attrs_b
)
1672 for (i
= 0; i
< SEG6_LOCAL_MAX
+ 1; i
++) {
1673 if (attrs_a
& (1 << i
)) {
1674 param
= &seg6_action_params
[i
];
1675 if (param
->cmp(slwt_a
, slwt_b
))
1683 static const struct lwtunnel_encap_ops seg6_local_ops
= {
1684 .build_state
= seg6_local_build_state
,
1685 .destroy_state
= seg6_local_destroy_state
,
1686 .input
= seg6_local_input
,
1687 .fill_encap
= seg6_local_fill_encap
,
1688 .get_encap_size
= seg6_local_get_encap_size
,
1689 .cmp_encap
= seg6_local_cmp_encap
,
1690 .owner
= THIS_MODULE
,
1693 int __init
seg6_local_init(void)
1695 return lwtunnel_encap_add_ops(&seg6_local_ops
,
1696 LWTUNNEL_ENCAP_SEG6_LOCAL
);
1699 void seg6_local_exit(void)
1701 lwtunnel_encap_del_ops(&seg6_local_ops
, LWTUNNEL_ENCAP_SEG6_LOCAL
);