1 #include <linux/module.h>
2 #include <linux/errno.h>
3 #include <linux/socket.h>
4 #include <linux/skbuff.h>
7 #include <linux/types.h>
8 #include <linux/kernel.h>
9 #include <net/genetlink.h>
12 #include <net/protocol.h>
14 #include <net/udp_tunnel.h>
16 #include <uapi/linux/fou.h>
17 #include <uapi/linux/genetlink.h>
19 static DEFINE_SPINLOCK(fou_lock
);
20 static LIST_HEAD(fou_list
);
27 struct udp_offload udp_offloads
;
28 struct list_head list
;
31 #define FOU_F_REMCSUM_NOPARTIAL BIT(0)
37 struct udp_port_cfg udp_config
;
40 static inline struct fou
*fou_from_sock(struct sock
*sk
)
42 return sk
->sk_user_data
;
45 static void fou_recv_pull(struct sk_buff
*skb
, size_t len
)
47 struct iphdr
*iph
= ip_hdr(skb
);
49 /* Remove 'len' bytes from the packet (UDP header and
50 * FOU header if present).
52 iph
->tot_len
= htons(ntohs(iph
->tot_len
) - len
);
54 skb_postpull_rcsum(skb
, udp_hdr(skb
), len
);
55 skb_reset_transport_header(skb
);
58 static int fou_udp_recv(struct sock
*sk
, struct sk_buff
*skb
)
60 struct fou
*fou
= fou_from_sock(sk
);
65 fou_recv_pull(skb
, sizeof(struct udphdr
));
67 return -fou
->protocol
;
70 static struct guehdr
*gue_remcsum(struct sk_buff
*skb
, struct guehdr
*guehdr
,
71 void *data
, size_t hdrlen
, u8 ipproto
,
75 size_t start
= ntohs(pd
[0]);
76 size_t offset
= ntohs(pd
[1]);
77 size_t plen
= hdrlen
+ max_t(size_t, offset
+ sizeof(u16
), start
);
79 if (!pskb_may_pull(skb
, plen
))
81 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
83 skb_remcsum_process(skb
, (void *)guehdr
+ hdrlen
,
84 start
, offset
, nopartial
);
89 static int gue_control_message(struct sk_buff
*skb
, struct guehdr
*guehdr
)
96 static int gue_udp_recv(struct sock
*sk
, struct sk_buff
*skb
)
98 struct fou
*fou
= fou_from_sock(sk
);
99 size_t len
, optlen
, hdrlen
;
100 struct guehdr
*guehdr
;
107 len
= sizeof(struct udphdr
) + sizeof(struct guehdr
);
108 if (!pskb_may_pull(skb
, len
))
111 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
113 optlen
= guehdr
->hlen
<< 2;
116 if (!pskb_may_pull(skb
, len
))
119 /* guehdr may change after pull */
120 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
122 hdrlen
= sizeof(struct guehdr
) + optlen
;
124 if (guehdr
->version
!= 0 || validate_gue_flags(guehdr
, optlen
))
127 hdrlen
= sizeof(struct guehdr
) + optlen
;
129 ip_hdr(skb
)->tot_len
= htons(ntohs(ip_hdr(skb
)->tot_len
) - len
);
131 /* Pull csum through the guehdr now . This can be used if
132 * there is a remote checksum offload.
134 skb_postpull_rcsum(skb
, udp_hdr(skb
), len
);
138 if (guehdr
->flags
& GUE_FLAG_PRIV
) {
139 __be32 flags
= *(__be32
*)(data
+ doffset
);
141 doffset
+= GUE_LEN_PRIV
;
143 if (flags
& GUE_PFLAG_REMCSUM
) {
144 guehdr
= gue_remcsum(skb
, guehdr
, data
+ doffset
,
145 hdrlen
, guehdr
->proto_ctype
,
147 FOU_F_REMCSUM_NOPARTIAL
));
153 doffset
+= GUE_PLEN_REMCSUM
;
157 if (unlikely(guehdr
->control
))
158 return gue_control_message(skb
, guehdr
);
160 __skb_pull(skb
, sizeof(struct udphdr
) + hdrlen
);
161 skb_reset_transport_header(skb
);
163 return -guehdr
->proto_ctype
;
170 static struct sk_buff
**fou_gro_receive(struct sk_buff
**head
,
172 struct udp_offload
*uoff
)
174 const struct net_offload
*ops
;
175 struct sk_buff
**pp
= NULL
;
176 u8 proto
= NAPI_GRO_CB(skb
)->proto
;
177 const struct net_offload
**offloads
;
180 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
181 ops
= rcu_dereference(offloads
[proto
]);
182 if (!ops
|| !ops
->callbacks
.gro_receive
)
185 pp
= ops
->callbacks
.gro_receive(head
, skb
);
193 static int fou_gro_complete(struct sk_buff
*skb
, int nhoff
,
194 struct udp_offload
*uoff
)
196 const struct net_offload
*ops
;
197 u8 proto
= NAPI_GRO_CB(skb
)->proto
;
199 const struct net_offload
**offloads
;
201 udp_tunnel_gro_complete(skb
, nhoff
);
204 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
205 ops
= rcu_dereference(offloads
[proto
]);
206 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_complete
))
209 err
= ops
->callbacks
.gro_complete(skb
, nhoff
);
217 static struct guehdr
*gue_gro_remcsum(struct sk_buff
*skb
, unsigned int off
,
218 struct guehdr
*guehdr
, void *data
,
219 size_t hdrlen
, u8 ipproto
,
220 struct gro_remcsum
*grc
, bool nopartial
)
223 size_t start
= ntohs(pd
[0]);
224 size_t offset
= ntohs(pd
[1]);
225 size_t plen
= hdrlen
+ max_t(size_t, offset
+ sizeof(u16
), start
);
227 if (skb
->remcsum_offload
)
230 if (!NAPI_GRO_CB(skb
)->csum_valid
)
233 /* Pull checksum that will be written */
234 if (skb_gro_header_hard(skb
, off
+ plen
)) {
235 guehdr
= skb_gro_header_slow(skb
, off
+ plen
, off
);
240 skb_gro_remcsum_process(skb
, (void *)guehdr
+ hdrlen
,
241 start
, offset
, grc
, nopartial
);
243 skb
->remcsum_offload
= 1;
248 static struct sk_buff
**gue_gro_receive(struct sk_buff
**head
,
250 struct udp_offload
*uoff
)
252 const struct net_offload
**offloads
;
253 const struct net_offload
*ops
;
254 struct sk_buff
**pp
= NULL
;
256 struct guehdr
*guehdr
;
257 size_t len
, optlen
, hdrlen
, off
;
261 struct fou
*fou
= container_of(uoff
, struct fou
, udp_offloads
);
262 struct gro_remcsum grc
;
264 skb_gro_remcsum_init(&grc
);
266 off
= skb_gro_offset(skb
);
267 len
= off
+ sizeof(*guehdr
);
269 guehdr
= skb_gro_header_fast(skb
, off
);
270 if (skb_gro_header_hard(skb
, len
)) {
271 guehdr
= skb_gro_header_slow(skb
, len
, off
);
272 if (unlikely(!guehdr
))
276 optlen
= guehdr
->hlen
<< 2;
279 if (skb_gro_header_hard(skb
, len
)) {
280 guehdr
= skb_gro_header_slow(skb
, len
, off
);
281 if (unlikely(!guehdr
))
285 if (unlikely(guehdr
->control
) || guehdr
->version
!= 0 ||
286 validate_gue_flags(guehdr
, optlen
))
289 hdrlen
= sizeof(*guehdr
) + optlen
;
291 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
292 * this is needed if there is a remote checkcsum offload.
294 skb_gro_postpull_rcsum(skb
, guehdr
, hdrlen
);
298 if (guehdr
->flags
& GUE_FLAG_PRIV
) {
299 __be32 flags
= *(__be32
*)(data
+ doffset
);
301 doffset
+= GUE_LEN_PRIV
;
303 if (flags
& GUE_PFLAG_REMCSUM
) {
304 guehdr
= gue_gro_remcsum(skb
, off
, guehdr
,
305 data
+ doffset
, hdrlen
,
306 guehdr
->proto_ctype
, &grc
,
308 FOU_F_REMCSUM_NOPARTIAL
));
314 doffset
+= GUE_PLEN_REMCSUM
;
318 skb_gro_pull(skb
, hdrlen
);
322 for (p
= *head
; p
; p
= p
->next
) {
323 const struct guehdr
*guehdr2
;
325 if (!NAPI_GRO_CB(p
)->same_flow
)
328 guehdr2
= (struct guehdr
*)(p
->data
+ off
);
330 /* Compare base GUE header to be equal (covers
331 * hlen, version, proto_ctype, and flags.
333 if (guehdr
->word
!= guehdr2
->word
) {
334 NAPI_GRO_CB(p
)->same_flow
= 0;
338 /* Compare optional fields are the same. */
339 if (guehdr
->hlen
&& memcmp(&guehdr
[1], &guehdr2
[1],
340 guehdr
->hlen
<< 2)) {
341 NAPI_GRO_CB(p
)->same_flow
= 0;
347 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
348 ops
= rcu_dereference(offloads
[guehdr
->proto_ctype
]);
349 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_receive
))
352 pp
= ops
->callbacks
.gro_receive(head
, skb
);
357 NAPI_GRO_CB(skb
)->flush
|= flush
;
358 skb_gro_remcsum_cleanup(skb
, &grc
);
363 static int gue_gro_complete(struct sk_buff
*skb
, int nhoff
,
364 struct udp_offload
*uoff
)
366 const struct net_offload
**offloads
;
367 struct guehdr
*guehdr
= (struct guehdr
*)(skb
->data
+ nhoff
);
368 const struct net_offload
*ops
;
369 unsigned int guehlen
;
373 proto
= guehdr
->proto_ctype
;
375 guehlen
= sizeof(*guehdr
) + (guehdr
->hlen
<< 2);
378 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
379 ops
= rcu_dereference(offloads
[proto
]);
380 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_complete
))
383 err
= ops
->callbacks
.gro_complete(skb
, nhoff
+ guehlen
);
390 static int fou_add_to_port_list(struct fou
*fou
)
394 spin_lock(&fou_lock
);
395 list_for_each_entry(fout
, &fou_list
, list
) {
396 if (fou
->port
== fout
->port
) {
397 spin_unlock(&fou_lock
);
402 list_add(&fou
->list
, &fou_list
);
403 spin_unlock(&fou_lock
);
408 static void fou_release(struct fou
*fou
)
410 struct socket
*sock
= fou
->sock
;
411 struct sock
*sk
= sock
->sk
;
413 if (sk
->sk_family
== AF_INET
)
414 udp_del_offload(&fou
->udp_offloads
);
416 list_del(&fou
->list
);
418 /* Remove hooks into tunnel socket */
419 sk
->sk_user_data
= NULL
;
426 static int fou_encap_init(struct sock
*sk
, struct fou
*fou
, struct fou_cfg
*cfg
)
428 udp_sk(sk
)->encap_rcv
= fou_udp_recv
;
429 fou
->protocol
= cfg
->protocol
;
430 fou
->udp_offloads
.callbacks
.gro_receive
= fou_gro_receive
;
431 fou
->udp_offloads
.callbacks
.gro_complete
= fou_gro_complete
;
432 fou
->udp_offloads
.port
= cfg
->udp_config
.local_udp_port
;
433 fou
->udp_offloads
.ipproto
= cfg
->protocol
;
438 static int gue_encap_init(struct sock
*sk
, struct fou
*fou
, struct fou_cfg
*cfg
)
440 udp_sk(sk
)->encap_rcv
= gue_udp_recv
;
441 fou
->udp_offloads
.callbacks
.gro_receive
= gue_gro_receive
;
442 fou
->udp_offloads
.callbacks
.gro_complete
= gue_gro_complete
;
443 fou
->udp_offloads
.port
= cfg
->udp_config
.local_udp_port
;
448 static int fou_create(struct net
*net
, struct fou_cfg
*cfg
,
449 struct socket
**sockp
)
451 struct fou
*fou
= NULL
;
453 struct socket
*sock
= NULL
;
456 /* Open UDP socket */
457 err
= udp_sock_create(net
, &cfg
->udp_config
, &sock
);
461 /* Allocate FOU port structure */
462 fou
= kzalloc(sizeof(*fou
), GFP_KERNEL
);
470 fou
->flags
= cfg
->flags
;
471 fou
->port
= cfg
->udp_config
.local_udp_port
;
473 /* Initial for fou type */
475 case FOU_ENCAP_DIRECT
:
476 err
= fou_encap_init(sk
, fou
, cfg
);
481 err
= gue_encap_init(sk
, fou
, cfg
);
490 udp_sk(sk
)->encap_type
= 1;
493 sk
->sk_user_data
= fou
;
496 inet_inc_convert_csum(sk
);
498 sk
->sk_allocation
= GFP_ATOMIC
;
500 if (cfg
->udp_config
.family
== AF_INET
) {
501 err
= udp_add_offload(&fou
->udp_offloads
);
506 err
= fou_add_to_port_list(fou
);
523 static int fou_destroy(struct net
*net
, struct fou_cfg
*cfg
)
526 u16 port
= cfg
->udp_config
.local_udp_port
;
529 spin_lock(&fou_lock
);
530 list_for_each_entry(fou
, &fou_list
, list
) {
531 if (fou
->port
== port
) {
537 spin_unlock(&fou_lock
);
542 static struct genl_family fou_nl_family
= {
543 .id
= GENL_ID_GENERATE
,
545 .name
= FOU_GENL_NAME
,
546 .version
= FOU_GENL_VERSION
,
547 .maxattr
= FOU_ATTR_MAX
,
551 static struct nla_policy fou_nl_policy
[FOU_ATTR_MAX
+ 1] = {
552 [FOU_ATTR_PORT
] = { .type
= NLA_U16
, },
553 [FOU_ATTR_AF
] = { .type
= NLA_U8
, },
554 [FOU_ATTR_IPPROTO
] = { .type
= NLA_U8
, },
555 [FOU_ATTR_TYPE
] = { .type
= NLA_U8
, },
556 [FOU_ATTR_REMCSUM_NOPARTIAL
] = { .type
= NLA_FLAG
, },
559 static int parse_nl_config(struct genl_info
*info
,
562 memset(cfg
, 0, sizeof(*cfg
));
564 cfg
->udp_config
.family
= AF_INET
;
566 if (info
->attrs
[FOU_ATTR_AF
]) {
567 u8 family
= nla_get_u8(info
->attrs
[FOU_ATTR_AF
]);
569 if (family
!= AF_INET
&& family
!= AF_INET6
)
572 cfg
->udp_config
.family
= family
;
575 if (info
->attrs
[FOU_ATTR_PORT
]) {
576 u16 port
= nla_get_u16(info
->attrs
[FOU_ATTR_PORT
]);
578 cfg
->udp_config
.local_udp_port
= port
;
581 if (info
->attrs
[FOU_ATTR_IPPROTO
])
582 cfg
->protocol
= nla_get_u8(info
->attrs
[FOU_ATTR_IPPROTO
]);
584 if (info
->attrs
[FOU_ATTR_TYPE
])
585 cfg
->type
= nla_get_u8(info
->attrs
[FOU_ATTR_TYPE
]);
587 if (info
->attrs
[FOU_ATTR_REMCSUM_NOPARTIAL
])
588 cfg
->flags
|= FOU_F_REMCSUM_NOPARTIAL
;
593 static int fou_nl_cmd_add_port(struct sk_buff
*skb
, struct genl_info
*info
)
598 err
= parse_nl_config(info
, &cfg
);
602 return fou_create(&init_net
, &cfg
, NULL
);
605 static int fou_nl_cmd_rm_port(struct sk_buff
*skb
, struct genl_info
*info
)
610 err
= parse_nl_config(info
, &cfg
);
614 return fou_destroy(&init_net
, &cfg
);
617 static const struct genl_ops fou_nl_ops
[] = {
620 .doit
= fou_nl_cmd_add_port
,
621 .policy
= fou_nl_policy
,
622 .flags
= GENL_ADMIN_PERM
,
626 .doit
= fou_nl_cmd_rm_port
,
627 .policy
= fou_nl_policy
,
628 .flags
= GENL_ADMIN_PERM
,
632 size_t fou_encap_hlen(struct ip_tunnel_encap
*e
)
634 return sizeof(struct udphdr
);
636 EXPORT_SYMBOL(fou_encap_hlen
);
638 size_t gue_encap_hlen(struct ip_tunnel_encap
*e
)
641 bool need_priv
= false;
643 len
= sizeof(struct udphdr
) + sizeof(struct guehdr
);
645 if (e
->flags
& TUNNEL_ENCAP_FLAG_REMCSUM
) {
646 len
+= GUE_PLEN_REMCSUM
;
650 len
+= need_priv
? GUE_LEN_PRIV
: 0;
654 EXPORT_SYMBOL(gue_encap_hlen
);
656 static void fou_build_udp(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
657 struct flowi4
*fl4
, u8
*protocol
, __be16 sport
)
661 skb_push(skb
, sizeof(struct udphdr
));
662 skb_reset_transport_header(skb
);
668 uh
->len
= htons(skb
->len
);
670 udp_set_csum(!(e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
), skb
,
671 fl4
->saddr
, fl4
->daddr
, skb
->len
);
673 *protocol
= IPPROTO_UDP
;
676 int fou_build_header(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
677 u8
*protocol
, struct flowi4
*fl4
)
679 bool csum
= !!(e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
);
680 int type
= csum
? SKB_GSO_UDP_TUNNEL_CSUM
: SKB_GSO_UDP_TUNNEL
;
683 skb
= iptunnel_handle_offloads(skb
, csum
, type
);
688 sport
= e
->sport
? : udp_flow_src_port(dev_net(skb
->dev
),
690 fou_build_udp(skb
, e
, fl4
, protocol
, sport
);
694 EXPORT_SYMBOL(fou_build_header
);
696 int gue_build_header(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
697 u8
*protocol
, struct flowi4
*fl4
)
699 bool csum
= !!(e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
);
700 int type
= csum
? SKB_GSO_UDP_TUNNEL_CSUM
: SKB_GSO_UDP_TUNNEL
;
701 struct guehdr
*guehdr
;
702 size_t hdrlen
, optlen
= 0;
705 bool need_priv
= false;
707 if ((e
->flags
& TUNNEL_ENCAP_FLAG_REMCSUM
) &&
708 skb
->ip_summed
== CHECKSUM_PARTIAL
) {
710 optlen
+= GUE_PLEN_REMCSUM
;
711 type
|= SKB_GSO_TUNNEL_REMCSUM
;
715 optlen
+= need_priv
? GUE_LEN_PRIV
: 0;
717 skb
= iptunnel_handle_offloads(skb
, csum
, type
);
722 /* Get source port (based on flow hash) before skb_push */
723 sport
= e
->sport
? : udp_flow_src_port(dev_net(skb
->dev
),
726 hdrlen
= sizeof(struct guehdr
) + optlen
;
728 skb_push(skb
, hdrlen
);
730 guehdr
= (struct guehdr
*)skb
->data
;
734 guehdr
->hlen
= optlen
>> 2;
736 guehdr
->proto_ctype
= *protocol
;
741 __be32
*flags
= data
;
743 guehdr
->flags
|= GUE_FLAG_PRIV
;
745 data
+= GUE_LEN_PRIV
;
747 if (type
& SKB_GSO_TUNNEL_REMCSUM
) {
748 u16 csum_start
= skb_checksum_start_offset(skb
);
751 if (csum_start
< hdrlen
)
754 csum_start
-= hdrlen
;
755 pd
[0] = htons(csum_start
);
756 pd
[1] = htons(csum_start
+ skb
->csum_offset
);
758 if (!skb_is_gso(skb
)) {
759 skb
->ip_summed
= CHECKSUM_NONE
;
760 skb
->encapsulation
= 0;
763 *flags
|= GUE_PFLAG_REMCSUM
;
764 data
+= GUE_PLEN_REMCSUM
;
769 fou_build_udp(skb
, e
, fl4
, protocol
, sport
);
773 EXPORT_SYMBOL(gue_build_header
);
775 #ifdef CONFIG_NET_FOU_IP_TUNNELS
777 static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops
= {
778 .encap_hlen
= fou_encap_hlen
,
779 .build_header
= fou_build_header
,
782 static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops
= {
783 .encap_hlen
= gue_encap_hlen
,
784 .build_header
= gue_build_header
,
787 static int ip_tunnel_encap_add_fou_ops(void)
791 ret
= ip_tunnel_encap_add_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
793 pr_err("can't add fou ops\n");
797 ret
= ip_tunnel_encap_add_ops(&gue_iptun_ops
, TUNNEL_ENCAP_GUE
);
799 pr_err("can't add gue ops\n");
800 ip_tunnel_encap_del_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
807 static void ip_tunnel_encap_del_fou_ops(void)
809 ip_tunnel_encap_del_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
810 ip_tunnel_encap_del_ops(&gue_iptun_ops
, TUNNEL_ENCAP_GUE
);
815 static int ip_tunnel_encap_add_fou_ops(void)
820 static void ip_tunnel_encap_del_fou_ops(void)
826 static int __init
fou_init(void)
830 ret
= genl_register_family_with_ops(&fou_nl_family
,
836 ret
= ip_tunnel_encap_add_fou_ops();
838 genl_unregister_family(&fou_nl_family
);
844 static void __exit
fou_fini(void)
846 struct fou
*fou
, *next
;
848 ip_tunnel_encap_del_fou_ops();
850 genl_unregister_family(&fou_nl_family
);
852 /* Close all the FOU sockets */
854 spin_lock(&fou_lock
);
855 list_for_each_entry_safe(fou
, next
, &fou_list
, list
)
857 spin_unlock(&fou_lock
);
860 module_init(fou_init
);
861 module_exit(fou_fini
);
862 MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
863 MODULE_LICENSE("GPL");