1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
3 #include <linux/errno.h>
4 #include <linux/socket.h>
5 #include <linux/skbuff.h>
7 #include <linux/icmp.h>
9 #include <linux/types.h>
10 #include <linux/kernel.h>
11 #include <net/genetlink.h>
15 #include <net/protocol.h>
17 #include <net/udp_tunnel.h>
19 #include <uapi/linux/fou.h>
20 #include <uapi/linux/genetlink.h>
29 struct list_head list
;
33 #define FOU_F_REMCSUM_NOPARTIAL BIT(0)
39 struct udp_port_cfg udp_config
;
42 static unsigned int fou_net_id
;
45 struct list_head fou_list
;
46 struct mutex fou_lock
;
49 static inline struct fou
*fou_from_sock(struct sock
*sk
)
51 return sk
->sk_user_data
;
54 static int fou_recv_pull(struct sk_buff
*skb
, struct fou
*fou
, size_t len
)
56 /* Remove 'len' bytes from the packet (UDP header and
57 * FOU header if present).
59 if (fou
->family
== AF_INET
)
60 ip_hdr(skb
)->tot_len
= htons(ntohs(ip_hdr(skb
)->tot_len
) - len
);
62 ipv6_hdr(skb
)->payload_len
=
63 htons(ntohs(ipv6_hdr(skb
)->payload_len
) - len
);
66 skb_postpull_rcsum(skb
, udp_hdr(skb
), len
);
67 skb_reset_transport_header(skb
);
68 return iptunnel_pull_offloads(skb
);
71 static int fou_udp_recv(struct sock
*sk
, struct sk_buff
*skb
)
73 struct fou
*fou
= fou_from_sock(sk
);
78 if (fou_recv_pull(skb
, fou
, sizeof(struct udphdr
)))
81 return -fou
->protocol
;
88 static struct guehdr
*gue_remcsum(struct sk_buff
*skb
, struct guehdr
*guehdr
,
89 void *data
, size_t hdrlen
, u8 ipproto
,
93 size_t start
= ntohs(pd
[0]);
94 size_t offset
= ntohs(pd
[1]);
95 size_t plen
= sizeof(struct udphdr
) + hdrlen
+
96 max_t(size_t, offset
+ sizeof(u16
), start
);
98 if (skb
->remcsum_offload
)
101 if (!pskb_may_pull(skb
, plen
))
103 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
105 skb_remcsum_process(skb
, (void *)guehdr
+ hdrlen
,
106 start
, offset
, nopartial
);
111 static int gue_control_message(struct sk_buff
*skb
, struct guehdr
*guehdr
)
118 static int gue_udp_recv(struct sock
*sk
, struct sk_buff
*skb
)
120 struct fou
*fou
= fou_from_sock(sk
);
121 size_t len
, optlen
, hdrlen
;
122 struct guehdr
*guehdr
;
130 len
= sizeof(struct udphdr
) + sizeof(struct guehdr
);
131 if (!pskb_may_pull(skb
, len
))
134 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
136 switch (guehdr
->version
) {
137 case 0: /* Full GUE header present */
141 /* Direct encapsulation of IPv4 or IPv6 */
145 switch (((struct iphdr
*)guehdr
)->version
) {
156 if (fou_recv_pull(skb
, fou
, sizeof(struct udphdr
)))
162 default: /* Undefined version */
166 optlen
= guehdr
->hlen
<< 2;
169 if (!pskb_may_pull(skb
, len
))
172 /* guehdr may change after pull */
173 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
175 if (validate_gue_flags(guehdr
, optlen
))
178 hdrlen
= sizeof(struct guehdr
) + optlen
;
180 if (fou
->family
== AF_INET
)
181 ip_hdr(skb
)->tot_len
= htons(ntohs(ip_hdr(skb
)->tot_len
) - len
);
183 ipv6_hdr(skb
)->payload_len
=
184 htons(ntohs(ipv6_hdr(skb
)->payload_len
) - len
);
186 /* Pull csum through the guehdr now . This can be used if
187 * there is a remote checksum offload.
189 skb_postpull_rcsum(skb
, udp_hdr(skb
), len
);
193 if (guehdr
->flags
& GUE_FLAG_PRIV
) {
194 __be32 flags
= *(__be32
*)(data
+ doffset
);
196 doffset
+= GUE_LEN_PRIV
;
198 if (flags
& GUE_PFLAG_REMCSUM
) {
199 guehdr
= gue_remcsum(skb
, guehdr
, data
+ doffset
,
200 hdrlen
, guehdr
->proto_ctype
,
202 FOU_F_REMCSUM_NOPARTIAL
));
208 doffset
+= GUE_PLEN_REMCSUM
;
212 if (unlikely(guehdr
->control
))
213 return gue_control_message(skb
, guehdr
);
215 proto_ctype
= guehdr
->proto_ctype
;
216 __skb_pull(skb
, sizeof(struct udphdr
) + hdrlen
);
217 skb_reset_transport_header(skb
);
219 if (iptunnel_pull_offloads(skb
))
229 static struct sk_buff
*fou_gro_receive(struct sock
*sk
,
230 struct list_head
*head
,
233 u8 proto
= fou_from_sock(sk
)->protocol
;
234 const struct net_offload
**offloads
;
235 const struct net_offload
*ops
;
236 struct sk_buff
*pp
= NULL
;
238 /* We can clear the encap_mark for FOU as we are essentially doing
239 * one of two possible things. We are either adding an L4 tunnel
240 * header to the outer L3 tunnel header, or we are are simply
241 * treating the GRE tunnel header as though it is a UDP protocol
242 * specific header such as VXLAN or GENEVE.
244 NAPI_GRO_CB(skb
)->encap_mark
= 0;
246 /* Flag this frame as already having an outer encap header */
247 NAPI_GRO_CB(skb
)->is_fou
= 1;
250 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
251 ops
= rcu_dereference(offloads
[proto
]);
252 if (!ops
|| !ops
->callbacks
.gro_receive
)
255 pp
= call_gro_receive(ops
->callbacks
.gro_receive
, head
, skb
);
263 static int fou_gro_complete(struct sock
*sk
, struct sk_buff
*skb
,
266 const struct net_offload
*ops
;
267 u8 proto
= fou_from_sock(sk
)->protocol
;
269 const struct net_offload
**offloads
;
272 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
273 ops
= rcu_dereference(offloads
[proto
]);
274 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_complete
))
277 err
= ops
->callbacks
.gro_complete(skb
, nhoff
);
279 skb_set_inner_mac_header(skb
, nhoff
);
287 static struct guehdr
*gue_gro_remcsum(struct sk_buff
*skb
, unsigned int off
,
288 struct guehdr
*guehdr
, void *data
,
289 size_t hdrlen
, struct gro_remcsum
*grc
,
293 size_t start
= ntohs(pd
[0]);
294 size_t offset
= ntohs(pd
[1]);
296 if (skb
->remcsum_offload
)
299 if (!NAPI_GRO_CB(skb
)->csum_valid
)
302 guehdr
= skb_gro_remcsum_process(skb
, (void *)guehdr
, off
, hdrlen
,
303 start
, offset
, grc
, nopartial
);
305 skb
->remcsum_offload
= 1;
310 static struct sk_buff
*gue_gro_receive(struct sock
*sk
,
311 struct list_head
*head
,
314 const struct net_offload
**offloads
;
315 const struct net_offload
*ops
;
316 struct sk_buff
*pp
= NULL
;
318 struct guehdr
*guehdr
;
319 size_t len
, optlen
, hdrlen
, off
;
323 struct fou
*fou
= fou_from_sock(sk
);
324 struct gro_remcsum grc
;
327 skb_gro_remcsum_init(&grc
);
329 off
= skb_gro_offset(skb
);
330 len
= off
+ sizeof(*guehdr
);
332 guehdr
= skb_gro_header_fast(skb
, off
);
333 if (skb_gro_header_hard(skb
, len
)) {
334 guehdr
= skb_gro_header_slow(skb
, len
, off
);
335 if (unlikely(!guehdr
))
339 switch (guehdr
->version
) {
343 switch (((struct iphdr
*)guehdr
)->version
) {
345 proto
= IPPROTO_IPIP
;
348 proto
= IPPROTO_IPV6
;
358 optlen
= guehdr
->hlen
<< 2;
361 if (skb_gro_header_hard(skb
, len
)) {
362 guehdr
= skb_gro_header_slow(skb
, len
, off
);
363 if (unlikely(!guehdr
))
367 if (unlikely(guehdr
->control
) || guehdr
->version
!= 0 ||
368 validate_gue_flags(guehdr
, optlen
))
371 hdrlen
= sizeof(*guehdr
) + optlen
;
373 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
374 * this is needed if there is a remote checkcsum offload.
376 skb_gro_postpull_rcsum(skb
, guehdr
, hdrlen
);
380 if (guehdr
->flags
& GUE_FLAG_PRIV
) {
381 __be32 flags
= *(__be32
*)(data
+ doffset
);
383 doffset
+= GUE_LEN_PRIV
;
385 if (flags
& GUE_PFLAG_REMCSUM
) {
386 guehdr
= gue_gro_remcsum(skb
, off
, guehdr
,
387 data
+ doffset
, hdrlen
, &grc
,
389 FOU_F_REMCSUM_NOPARTIAL
));
396 doffset
+= GUE_PLEN_REMCSUM
;
400 skb_gro_pull(skb
, hdrlen
);
402 list_for_each_entry(p
, head
, list
) {
403 const struct guehdr
*guehdr2
;
405 if (!NAPI_GRO_CB(p
)->same_flow
)
408 guehdr2
= (struct guehdr
*)(p
->data
+ off
);
410 /* Compare base GUE header to be equal (covers
411 * hlen, version, proto_ctype, and flags.
413 if (guehdr
->word
!= guehdr2
->word
) {
414 NAPI_GRO_CB(p
)->same_flow
= 0;
418 /* Compare optional fields are the same. */
419 if (guehdr
->hlen
&& memcmp(&guehdr
[1], &guehdr2
[1],
420 guehdr
->hlen
<< 2)) {
421 NAPI_GRO_CB(p
)->same_flow
= 0;
426 proto
= guehdr
->proto_ctype
;
430 /* We can clear the encap_mark for GUE as we are essentially doing
431 * one of two possible things. We are either adding an L4 tunnel
432 * header to the outer L3 tunnel header, or we are are simply
433 * treating the GRE tunnel header as though it is a UDP protocol
434 * specific header such as VXLAN or GENEVE.
436 NAPI_GRO_CB(skb
)->encap_mark
= 0;
438 /* Flag this frame as already having an outer encap header */
439 NAPI_GRO_CB(skb
)->is_fou
= 1;
442 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
443 ops
= rcu_dereference(offloads
[proto
]);
444 if (WARN_ON_ONCE(!ops
|| !ops
->callbacks
.gro_receive
))
447 pp
= call_gro_receive(ops
->callbacks
.gro_receive
, head
, skb
);
453 skb_gro_flush_final_remcsum(skb
, pp
, flush
, &grc
);
458 static int gue_gro_complete(struct sock
*sk
, struct sk_buff
*skb
, int nhoff
)
460 const struct net_offload
**offloads
;
461 struct guehdr
*guehdr
= (struct guehdr
*)(skb
->data
+ nhoff
);
462 const struct net_offload
*ops
;
463 unsigned int guehlen
= 0;
467 switch (guehdr
->version
) {
469 proto
= guehdr
->proto_ctype
;
470 guehlen
= sizeof(*guehdr
) + (guehdr
->hlen
<< 2);
473 switch (((struct iphdr
*)guehdr
)->version
) {
475 proto
= IPPROTO_IPIP
;
478 proto
= IPPROTO_IPV6
;
489 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
490 ops
= rcu_dereference(offloads
[proto
]);
491 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_complete
))
494 err
= ops
->callbacks
.gro_complete(skb
, nhoff
+ guehlen
);
496 skb_set_inner_mac_header(skb
, nhoff
+ guehlen
);
503 static bool fou_cfg_cmp(struct fou
*fou
, struct fou_cfg
*cfg
)
505 struct sock
*sk
= fou
->sock
->sk
;
506 struct udp_port_cfg
*udp_cfg
= &cfg
->udp_config
;
508 if (fou
->family
!= udp_cfg
->family
||
509 fou
->port
!= udp_cfg
->local_udp_port
||
510 sk
->sk_dport
!= udp_cfg
->peer_udp_port
||
511 sk
->sk_bound_dev_if
!= udp_cfg
->bind_ifindex
)
514 if (fou
->family
== AF_INET
) {
515 if (sk
->sk_rcv_saddr
!= udp_cfg
->local_ip
.s_addr
||
516 sk
->sk_daddr
!= udp_cfg
->peer_ip
.s_addr
)
520 #if IS_ENABLED(CONFIG_IPV6)
522 if (ipv6_addr_cmp(&sk
->sk_v6_rcv_saddr
, &udp_cfg
->local_ip6
) ||
523 ipv6_addr_cmp(&sk
->sk_v6_daddr
, &udp_cfg
->peer_ip6
))
533 static int fou_add_to_port_list(struct net
*net
, struct fou
*fou
,
536 struct fou_net
*fn
= net_generic(net
, fou_net_id
);
539 mutex_lock(&fn
->fou_lock
);
540 list_for_each_entry(fout
, &fn
->fou_list
, list
) {
541 if (fou_cfg_cmp(fout
, cfg
)) {
542 mutex_unlock(&fn
->fou_lock
);
547 list_add(&fou
->list
, &fn
->fou_list
);
548 mutex_unlock(&fn
->fou_lock
);
553 static void fou_release(struct fou
*fou
)
555 struct socket
*sock
= fou
->sock
;
557 list_del(&fou
->list
);
558 udp_tunnel_sock_release(sock
);
563 static int fou_create(struct net
*net
, struct fou_cfg
*cfg
,
564 struct socket
**sockp
)
566 struct socket
*sock
= NULL
;
567 struct fou
*fou
= NULL
;
569 struct udp_tunnel_sock_cfg tunnel_cfg
;
572 /* Open UDP socket */
573 err
= udp_sock_create(net
, &cfg
->udp_config
, &sock
);
577 /* Allocate FOU port structure */
578 fou
= kzalloc(sizeof(*fou
), GFP_KERNEL
);
586 fou
->port
= cfg
->udp_config
.local_udp_port
;
587 fou
->family
= cfg
->udp_config
.family
;
588 fou
->flags
= cfg
->flags
;
589 fou
->type
= cfg
->type
;
592 memset(&tunnel_cfg
, 0, sizeof(tunnel_cfg
));
593 tunnel_cfg
.encap_type
= 1;
594 tunnel_cfg
.sk_user_data
= fou
;
595 tunnel_cfg
.encap_destroy
= NULL
;
597 /* Initial for fou type */
599 case FOU_ENCAP_DIRECT
:
600 tunnel_cfg
.encap_rcv
= fou_udp_recv
;
601 tunnel_cfg
.gro_receive
= fou_gro_receive
;
602 tunnel_cfg
.gro_complete
= fou_gro_complete
;
603 fou
->protocol
= cfg
->protocol
;
606 tunnel_cfg
.encap_rcv
= gue_udp_recv
;
607 tunnel_cfg
.gro_receive
= gue_gro_receive
;
608 tunnel_cfg
.gro_complete
= gue_gro_complete
;
615 setup_udp_tunnel_sock(net
, sock
, &tunnel_cfg
);
617 sk
->sk_allocation
= GFP_ATOMIC
;
619 err
= fou_add_to_port_list(net
, fou
, cfg
);
631 udp_tunnel_sock_release(sock
);
636 static int fou_destroy(struct net
*net
, struct fou_cfg
*cfg
)
638 struct fou_net
*fn
= net_generic(net
, fou_net_id
);
642 mutex_lock(&fn
->fou_lock
);
643 list_for_each_entry(fou
, &fn
->fou_list
, list
) {
644 if (fou_cfg_cmp(fou
, cfg
)) {
650 mutex_unlock(&fn
->fou_lock
);
655 static struct genl_family fou_nl_family
;
657 static const struct nla_policy fou_nl_policy
[FOU_ATTR_MAX
+ 1] = {
658 [FOU_ATTR_PORT
] = { .type
= NLA_U16
, },
659 [FOU_ATTR_AF
] = { .type
= NLA_U8
, },
660 [FOU_ATTR_IPPROTO
] = { .type
= NLA_U8
, },
661 [FOU_ATTR_TYPE
] = { .type
= NLA_U8
, },
662 [FOU_ATTR_REMCSUM_NOPARTIAL
] = { .type
= NLA_FLAG
, },
663 [FOU_ATTR_LOCAL_V4
] = { .type
= NLA_U32
, },
664 [FOU_ATTR_PEER_V4
] = { .type
= NLA_U32
, },
665 [FOU_ATTR_LOCAL_V6
] = { .type
= sizeof(struct in6_addr
), },
666 [FOU_ATTR_PEER_V6
] = { .type
= sizeof(struct in6_addr
), },
667 [FOU_ATTR_PEER_PORT
] = { .type
= NLA_U16
, },
668 [FOU_ATTR_IFINDEX
] = { .type
= NLA_S32
, },
671 static int parse_nl_config(struct genl_info
*info
,
674 bool has_local
= false, has_peer
= false;
679 memset(cfg
, 0, sizeof(*cfg
));
681 cfg
->udp_config
.family
= AF_INET
;
683 if (info
->attrs
[FOU_ATTR_AF
]) {
684 u8 family
= nla_get_u8(info
->attrs
[FOU_ATTR_AF
]);
690 cfg
->udp_config
.ipv6_v6only
= 1;
693 return -EAFNOSUPPORT
;
696 cfg
->udp_config
.family
= family
;
699 if (info
->attrs
[FOU_ATTR_PORT
]) {
700 port
= nla_get_be16(info
->attrs
[FOU_ATTR_PORT
]);
701 cfg
->udp_config
.local_udp_port
= port
;
704 if (info
->attrs
[FOU_ATTR_IPPROTO
])
705 cfg
->protocol
= nla_get_u8(info
->attrs
[FOU_ATTR_IPPROTO
]);
707 if (info
->attrs
[FOU_ATTR_TYPE
])
708 cfg
->type
= nla_get_u8(info
->attrs
[FOU_ATTR_TYPE
]);
710 if (info
->attrs
[FOU_ATTR_REMCSUM_NOPARTIAL
])
711 cfg
->flags
|= FOU_F_REMCSUM_NOPARTIAL
;
713 if (cfg
->udp_config
.family
== AF_INET
) {
714 if (info
->attrs
[FOU_ATTR_LOCAL_V4
]) {
715 attr
= info
->attrs
[FOU_ATTR_LOCAL_V4
];
716 cfg
->udp_config
.local_ip
.s_addr
= nla_get_in_addr(attr
);
720 if (info
->attrs
[FOU_ATTR_PEER_V4
]) {
721 attr
= info
->attrs
[FOU_ATTR_PEER_V4
];
722 cfg
->udp_config
.peer_ip
.s_addr
= nla_get_in_addr(attr
);
725 #if IS_ENABLED(CONFIG_IPV6)
727 if (info
->attrs
[FOU_ATTR_LOCAL_V6
]) {
728 attr
= info
->attrs
[FOU_ATTR_LOCAL_V6
];
729 cfg
->udp_config
.local_ip6
= nla_get_in6_addr(attr
);
733 if (info
->attrs
[FOU_ATTR_PEER_V6
]) {
734 attr
= info
->attrs
[FOU_ATTR_PEER_V6
];
735 cfg
->udp_config
.peer_ip6
= nla_get_in6_addr(attr
);
742 if (info
->attrs
[FOU_ATTR_PEER_PORT
]) {
743 port
= nla_get_be16(info
->attrs
[FOU_ATTR_PEER_PORT
]);
744 cfg
->udp_config
.peer_udp_port
= port
;
750 if (info
->attrs
[FOU_ATTR_IFINDEX
]) {
754 ifindex
= nla_get_s32(info
->attrs
[FOU_ATTR_IFINDEX
]);
756 cfg
->udp_config
.bind_ifindex
= ifindex
;
762 static int fou_nl_cmd_add_port(struct sk_buff
*skb
, struct genl_info
*info
)
764 struct net
*net
= genl_info_net(info
);
768 err
= parse_nl_config(info
, &cfg
);
772 return fou_create(net
, &cfg
, NULL
);
775 static int fou_nl_cmd_rm_port(struct sk_buff
*skb
, struct genl_info
*info
)
777 struct net
*net
= genl_info_net(info
);
781 err
= parse_nl_config(info
, &cfg
);
785 return fou_destroy(net
, &cfg
);
788 static int fou_fill_info(struct fou
*fou
, struct sk_buff
*msg
)
790 struct sock
*sk
= fou
->sock
->sk
;
792 if (nla_put_u8(msg
, FOU_ATTR_AF
, fou
->sock
->sk
->sk_family
) ||
793 nla_put_be16(msg
, FOU_ATTR_PORT
, fou
->port
) ||
794 nla_put_be16(msg
, FOU_ATTR_PEER_PORT
, sk
->sk_dport
) ||
795 nla_put_u8(msg
, FOU_ATTR_IPPROTO
, fou
->protocol
) ||
796 nla_put_u8(msg
, FOU_ATTR_TYPE
, fou
->type
) ||
797 nla_put_s32(msg
, FOU_ATTR_IFINDEX
, sk
->sk_bound_dev_if
))
800 if (fou
->flags
& FOU_F_REMCSUM_NOPARTIAL
)
801 if (nla_put_flag(msg
, FOU_ATTR_REMCSUM_NOPARTIAL
))
804 if (fou
->sock
->sk
->sk_family
== AF_INET
) {
805 if (nla_put_in_addr(msg
, FOU_ATTR_LOCAL_V4
, sk
->sk_rcv_saddr
))
808 if (nla_put_in_addr(msg
, FOU_ATTR_PEER_V4
, sk
->sk_daddr
))
810 #if IS_ENABLED(CONFIG_IPV6)
812 if (nla_put_in6_addr(msg
, FOU_ATTR_LOCAL_V6
,
813 &sk
->sk_v6_rcv_saddr
))
816 if (nla_put_in6_addr(msg
, FOU_ATTR_PEER_V6
, &sk
->sk_v6_daddr
))
824 static int fou_dump_info(struct fou
*fou
, u32 portid
, u32 seq
,
825 u32 flags
, struct sk_buff
*skb
, u8 cmd
)
829 hdr
= genlmsg_put(skb
, portid
, seq
, &fou_nl_family
, flags
, cmd
);
833 if (fou_fill_info(fou
, skb
) < 0)
834 goto nla_put_failure
;
836 genlmsg_end(skb
, hdr
);
840 genlmsg_cancel(skb
, hdr
);
844 static int fou_nl_cmd_get_port(struct sk_buff
*skb
, struct genl_info
*info
)
846 struct net
*net
= genl_info_net(info
);
847 struct fou_net
*fn
= net_generic(net
, fou_net_id
);
855 ret
= parse_nl_config(info
, &cfg
);
858 port
= cfg
.udp_config
.local_udp_port
;
862 family
= cfg
.udp_config
.family
;
863 if (family
!= AF_INET
&& family
!= AF_INET6
)
866 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
871 mutex_lock(&fn
->fou_lock
);
872 list_for_each_entry(fout
, &fn
->fou_list
, list
) {
873 if (fou_cfg_cmp(fout
, &cfg
)) {
874 ret
= fou_dump_info(fout
, info
->snd_portid
,
875 info
->snd_seq
, 0, msg
,
880 mutex_unlock(&fn
->fou_lock
);
884 return genlmsg_reply(msg
, info
);
891 static int fou_nl_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
893 struct net
*net
= sock_net(skb
->sk
);
894 struct fou_net
*fn
= net_generic(net
, fou_net_id
);
898 mutex_lock(&fn
->fou_lock
);
899 list_for_each_entry(fout
, &fn
->fou_list
, list
) {
900 if (idx
++ < cb
->args
[0])
902 ret
= fou_dump_info(fout
, NETLINK_CB(cb
->skb
).portid
,
903 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
908 mutex_unlock(&fn
->fou_lock
);
914 static const struct genl_ops fou_nl_ops
[] = {
917 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
918 .doit
= fou_nl_cmd_add_port
,
919 .flags
= GENL_ADMIN_PERM
,
923 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
924 .doit
= fou_nl_cmd_rm_port
,
925 .flags
= GENL_ADMIN_PERM
,
929 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
930 .doit
= fou_nl_cmd_get_port
,
931 .dumpit
= fou_nl_dump
,
935 static struct genl_family fou_nl_family __ro_after_init
= {
937 .name
= FOU_GENL_NAME
,
938 .version
= FOU_GENL_VERSION
,
939 .maxattr
= FOU_ATTR_MAX
,
940 .policy
= fou_nl_policy
,
942 .module
= THIS_MODULE
,
944 .n_ops
= ARRAY_SIZE(fou_nl_ops
),
947 size_t fou_encap_hlen(struct ip_tunnel_encap
*e
)
949 return sizeof(struct udphdr
);
951 EXPORT_SYMBOL(fou_encap_hlen
);
953 size_t gue_encap_hlen(struct ip_tunnel_encap
*e
)
956 bool need_priv
= false;
958 len
= sizeof(struct udphdr
) + sizeof(struct guehdr
);
960 if (e
->flags
& TUNNEL_ENCAP_FLAG_REMCSUM
) {
961 len
+= GUE_PLEN_REMCSUM
;
965 len
+= need_priv
? GUE_LEN_PRIV
: 0;
969 EXPORT_SYMBOL(gue_encap_hlen
);
971 int __fou_build_header(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
972 u8
*protocol
, __be16
*sport
, int type
)
976 err
= iptunnel_handle_offloads(skb
, type
);
980 *sport
= e
->sport
? : udp_flow_src_port(dev_net(skb
->dev
),
985 EXPORT_SYMBOL(__fou_build_header
);
987 int __gue_build_header(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
988 u8
*protocol
, __be16
*sport
, int type
)
990 struct guehdr
*guehdr
;
991 size_t hdrlen
, optlen
= 0;
993 bool need_priv
= false;
996 if ((e
->flags
& TUNNEL_ENCAP_FLAG_REMCSUM
) &&
997 skb
->ip_summed
== CHECKSUM_PARTIAL
) {
998 optlen
+= GUE_PLEN_REMCSUM
;
999 type
|= SKB_GSO_TUNNEL_REMCSUM
;
1003 optlen
+= need_priv
? GUE_LEN_PRIV
: 0;
1005 err
= iptunnel_handle_offloads(skb
, type
);
1009 /* Get source port (based on flow hash) before skb_push */
1010 *sport
= e
->sport
? : udp_flow_src_port(dev_net(skb
->dev
),
1013 hdrlen
= sizeof(struct guehdr
) + optlen
;
1015 skb_push(skb
, hdrlen
);
1017 guehdr
= (struct guehdr
*)skb
->data
;
1019 guehdr
->control
= 0;
1020 guehdr
->version
= 0;
1021 guehdr
->hlen
= optlen
>> 2;
1023 guehdr
->proto_ctype
= *protocol
;
1028 __be32
*flags
= data
;
1030 guehdr
->flags
|= GUE_FLAG_PRIV
;
1032 data
+= GUE_LEN_PRIV
;
1034 if (type
& SKB_GSO_TUNNEL_REMCSUM
) {
1035 u16 csum_start
= skb_checksum_start_offset(skb
);
1038 if (csum_start
< hdrlen
)
1041 csum_start
-= hdrlen
;
1042 pd
[0] = htons(csum_start
);
1043 pd
[1] = htons(csum_start
+ skb
->csum_offset
);
1045 if (!skb_is_gso(skb
)) {
1046 skb
->ip_summed
= CHECKSUM_NONE
;
1047 skb
->encapsulation
= 0;
1050 *flags
|= GUE_PFLAG_REMCSUM
;
1051 data
+= GUE_PLEN_REMCSUM
;
1058 EXPORT_SYMBOL(__gue_build_header
);
1060 #ifdef CONFIG_NET_FOU_IP_TUNNELS
1062 static void fou_build_udp(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
1063 struct flowi4
*fl4
, u8
*protocol
, __be16 sport
)
1067 skb_push(skb
, sizeof(struct udphdr
));
1068 skb_reset_transport_header(skb
);
1072 uh
->dest
= e
->dport
;
1074 uh
->len
= htons(skb
->len
);
1075 udp_set_csum(!(e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
), skb
,
1076 fl4
->saddr
, fl4
->daddr
, skb
->len
);
1078 *protocol
= IPPROTO_UDP
;
1081 static int fou_build_header(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
1082 u8
*protocol
, struct flowi4
*fl4
)
1084 int type
= e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
? SKB_GSO_UDP_TUNNEL_CSUM
:
1089 err
= __fou_build_header(skb
, e
, protocol
, &sport
, type
);
1093 fou_build_udp(skb
, e
, fl4
, protocol
, sport
);
1098 static int gue_build_header(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
1099 u8
*protocol
, struct flowi4
*fl4
)
1101 int type
= e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
? SKB_GSO_UDP_TUNNEL_CSUM
:
1106 err
= __gue_build_header(skb
, e
, protocol
, &sport
, type
);
1110 fou_build_udp(skb
, e
, fl4
, protocol
, sport
);
1115 static int gue_err_proto_handler(int proto
, struct sk_buff
*skb
, u32 info
)
1117 const struct net_protocol
*ipprot
= rcu_dereference(inet_protos
[proto
]);
1119 if (ipprot
&& ipprot
->err_handler
) {
1120 if (!ipprot
->err_handler(skb
, info
))
1127 static int gue_err(struct sk_buff
*skb
, u32 info
)
1129 int transport_offset
= skb_transport_offset(skb
);
1130 struct guehdr
*guehdr
;
1134 len
= sizeof(struct udphdr
) + sizeof(struct guehdr
);
1135 if (!pskb_may_pull(skb
, transport_offset
+ len
))
1138 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
1140 switch (guehdr
->version
) {
1141 case 0: /* Full GUE header present */
1144 /* Direct encapsulation of IPv4 or IPv6 */
1145 skb_set_transport_header(skb
, -(int)sizeof(struct icmphdr
));
1147 switch (((struct iphdr
*)guehdr
)->version
) {
1149 ret
= gue_err_proto_handler(IPPROTO_IPIP
, skb
, info
);
1151 #if IS_ENABLED(CONFIG_IPV6)
1153 ret
= gue_err_proto_handler(IPPROTO_IPV6
, skb
, info
);
1161 default: /* Undefined version */
1165 if (guehdr
->control
)
1168 optlen
= guehdr
->hlen
<< 2;
1170 if (!pskb_may_pull(skb
, transport_offset
+ len
+ optlen
))
1173 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
1174 if (validate_gue_flags(guehdr
, optlen
))
1177 /* Handling exceptions for direct UDP encapsulation in GUE would lead to
1178 * recursion. Besides, this kind of encapsulation can't even be
1179 * configured currently. Discard this.
1181 if (guehdr
->proto_ctype
== IPPROTO_UDP
||
1182 guehdr
->proto_ctype
== IPPROTO_UDPLITE
)
1185 skb_set_transport_header(skb
, -(int)sizeof(struct icmphdr
));
1186 ret
= gue_err_proto_handler(guehdr
->proto_ctype
, skb
, info
);
1189 skb_set_transport_header(skb
, transport_offset
);
1194 static const struct ip_tunnel_encap_ops fou_iptun_ops
= {
1195 .encap_hlen
= fou_encap_hlen
,
1196 .build_header
= fou_build_header
,
1197 .err_handler
= gue_err
,
1200 static const struct ip_tunnel_encap_ops gue_iptun_ops
= {
1201 .encap_hlen
= gue_encap_hlen
,
1202 .build_header
= gue_build_header
,
1203 .err_handler
= gue_err
,
1206 static int ip_tunnel_encap_add_fou_ops(void)
1210 ret
= ip_tunnel_encap_add_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
1212 pr_err("can't add fou ops\n");
1216 ret
= ip_tunnel_encap_add_ops(&gue_iptun_ops
, TUNNEL_ENCAP_GUE
);
1218 pr_err("can't add gue ops\n");
1219 ip_tunnel_encap_del_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
1226 static void ip_tunnel_encap_del_fou_ops(void)
1228 ip_tunnel_encap_del_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
1229 ip_tunnel_encap_del_ops(&gue_iptun_ops
, TUNNEL_ENCAP_GUE
);
1234 static int ip_tunnel_encap_add_fou_ops(void)
1239 static void ip_tunnel_encap_del_fou_ops(void)
1245 static __net_init
int fou_init_net(struct net
*net
)
1247 struct fou_net
*fn
= net_generic(net
, fou_net_id
);
1249 INIT_LIST_HEAD(&fn
->fou_list
);
1250 mutex_init(&fn
->fou_lock
);
1254 static __net_exit
void fou_exit_net(struct net
*net
)
1256 struct fou_net
*fn
= net_generic(net
, fou_net_id
);
1257 struct fou
*fou
, *next
;
1259 /* Close all the FOU sockets */
1260 mutex_lock(&fn
->fou_lock
);
1261 list_for_each_entry_safe(fou
, next
, &fn
->fou_list
, list
)
1263 mutex_unlock(&fn
->fou_lock
);
1266 static struct pernet_operations fou_net_ops
= {
1267 .init
= fou_init_net
,
1268 .exit
= fou_exit_net
,
1270 .size
= sizeof(struct fou_net
),
1273 static int __init
fou_init(void)
1277 ret
= register_pernet_device(&fou_net_ops
);
1281 ret
= genl_register_family(&fou_nl_family
);
1285 ret
= ip_tunnel_encap_add_fou_ops();
1289 genl_unregister_family(&fou_nl_family
);
1291 unregister_pernet_device(&fou_net_ops
);
1296 static void __exit
fou_fini(void)
1298 ip_tunnel_encap_del_fou_ops();
1299 genl_unregister_family(&fou_nl_family
);
1300 unregister_pernet_device(&fou_net_ops
);
1303 module_init(fou_init
);
1304 module_exit(fou_fini
);
1305 MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
1306 MODULE_LICENSE("GPL");