gro: Allow tunnel stacking in the case of FOU/GUE
[linux/fpc-iii.git] / net / ipv4 / fou.c
blob7841b35e5ab03671cd0a04620de5f6c400277867
1 #include <linux/module.h>
2 #include <linux/errno.h>
3 #include <linux/socket.h>
4 #include <linux/skbuff.h>
5 #include <linux/ip.h>
6 #include <linux/udp.h>
7 #include <linux/types.h>
8 #include <linux/kernel.h>
9 #include <net/genetlink.h>
10 #include <net/gue.h>
11 #include <net/ip.h>
12 #include <net/protocol.h>
13 #include <net/udp.h>
14 #include <net/udp_tunnel.h>
15 #include <net/xfrm.h>
16 #include <uapi/linux/fou.h>
17 #include <uapi/linux/genetlink.h>
19 struct fou {
20 struct socket *sock;
21 u8 protocol;
22 u8 flags;
23 __be16 port;
24 u16 type;
25 struct udp_offload udp_offloads;
26 struct list_head list;
27 struct rcu_head rcu;
30 #define FOU_F_REMCSUM_NOPARTIAL BIT(0)
32 struct fou_cfg {
33 u16 type;
34 u8 protocol;
35 u8 flags;
36 struct udp_port_cfg udp_config;
39 static unsigned int fou_net_id;
41 struct fou_net {
42 struct list_head fou_list;
43 struct mutex fou_lock;
46 static inline struct fou *fou_from_sock(struct sock *sk)
48 return sk->sk_user_data;
51 static void fou_recv_pull(struct sk_buff *skb, size_t len)
53 struct iphdr *iph = ip_hdr(skb);
55 /* Remove 'len' bytes from the packet (UDP header and
56 * FOU header if present).
58 iph->tot_len = htons(ntohs(iph->tot_len) - len);
59 __skb_pull(skb, len);
60 skb_postpull_rcsum(skb, udp_hdr(skb), len);
61 skb_reset_transport_header(skb);
64 static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
66 struct fou *fou = fou_from_sock(sk);
68 if (!fou)
69 return 1;
71 fou_recv_pull(skb, sizeof(struct udphdr));
73 return -fou->protocol;
76 static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
77 void *data, size_t hdrlen, u8 ipproto,
78 bool nopartial)
80 __be16 *pd = data;
81 size_t start = ntohs(pd[0]);
82 size_t offset = ntohs(pd[1]);
83 size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
85 if (!pskb_may_pull(skb, plen))
86 return NULL;
87 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
89 skb_remcsum_process(skb, (void *)guehdr + hdrlen,
90 start, offset, nopartial);
92 return guehdr;
95 static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
97 /* No support yet */
98 kfree_skb(skb);
99 return 0;
102 static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
104 struct fou *fou = fou_from_sock(sk);
105 size_t len, optlen, hdrlen;
106 struct guehdr *guehdr;
107 void *data;
108 u16 doffset = 0;
110 if (!fou)
111 return 1;
113 len = sizeof(struct udphdr) + sizeof(struct guehdr);
114 if (!pskb_may_pull(skb, len))
115 goto drop;
117 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
119 optlen = guehdr->hlen << 2;
120 len += optlen;
122 if (!pskb_may_pull(skb, len))
123 goto drop;
125 /* guehdr may change after pull */
126 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
128 hdrlen = sizeof(struct guehdr) + optlen;
130 if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
131 goto drop;
133 hdrlen = sizeof(struct guehdr) + optlen;
135 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
137 /* Pull csum through the guehdr now . This can be used if
138 * there is a remote checksum offload.
140 skb_postpull_rcsum(skb, udp_hdr(skb), len);
142 data = &guehdr[1];
144 if (guehdr->flags & GUE_FLAG_PRIV) {
145 __be32 flags = *(__be32 *)(data + doffset);
147 doffset += GUE_LEN_PRIV;
149 if (flags & GUE_PFLAG_REMCSUM) {
150 guehdr = gue_remcsum(skb, guehdr, data + doffset,
151 hdrlen, guehdr->proto_ctype,
152 !!(fou->flags &
153 FOU_F_REMCSUM_NOPARTIAL));
154 if (!guehdr)
155 goto drop;
157 data = &guehdr[1];
159 doffset += GUE_PLEN_REMCSUM;
163 if (unlikely(guehdr->control))
164 return gue_control_message(skb, guehdr);
166 __skb_pull(skb, sizeof(struct udphdr) + hdrlen);
167 skb_reset_transport_header(skb);
169 return -guehdr->proto_ctype;
171 drop:
172 kfree_skb(skb);
173 return 0;
176 static struct sk_buff **fou_gro_receive(struct sk_buff **head,
177 struct sk_buff *skb,
178 struct udp_offload *uoff)
180 const struct net_offload *ops;
181 struct sk_buff **pp = NULL;
182 u8 proto = NAPI_GRO_CB(skb)->proto;
183 const struct net_offload **offloads;
185 /* We can clear the encap_mark for FOU as we are essentially doing
186 * one of two possible things. We are either adding an L4 tunnel
187 * header to the outer L3 tunnel header, or we are are simply
188 * treating the GRE tunnel header as though it is a UDP protocol
189 * specific header such as VXLAN or GENEVE.
191 NAPI_GRO_CB(skb)->encap_mark = 0;
193 rcu_read_lock();
194 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
195 ops = rcu_dereference(offloads[proto]);
196 if (!ops || !ops->callbacks.gro_receive)
197 goto out_unlock;
199 pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
201 out_unlock:
202 rcu_read_unlock();
204 return pp;
207 static int fou_gro_complete(struct sk_buff *skb, int nhoff,
208 struct udp_offload *uoff)
210 const struct net_offload *ops;
211 u8 proto = NAPI_GRO_CB(skb)->proto;
212 int err = -ENOSYS;
213 const struct net_offload **offloads;
215 udp_tunnel_gro_complete(skb, nhoff);
217 rcu_read_lock();
218 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
219 ops = rcu_dereference(offloads[proto]);
220 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
221 goto out_unlock;
223 err = ops->callbacks.gro_complete(skb, nhoff);
225 out_unlock:
226 rcu_read_unlock();
228 return err;
231 static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
232 struct guehdr *guehdr, void *data,
233 size_t hdrlen, u8 ipproto,
234 struct gro_remcsum *grc, bool nopartial)
236 __be16 *pd = data;
237 size_t start = ntohs(pd[0]);
238 size_t offset = ntohs(pd[1]);
239 size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
241 if (skb->remcsum_offload)
242 return NULL;
244 if (!NAPI_GRO_CB(skb)->csum_valid)
245 return NULL;
247 /* Pull checksum that will be written */
248 if (skb_gro_header_hard(skb, off + plen)) {
249 guehdr = skb_gro_header_slow(skb, off + plen, off);
250 if (!guehdr)
251 return NULL;
254 skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen,
255 start, offset, grc, nopartial);
257 skb->remcsum_offload = 1;
259 return guehdr;
262 static struct sk_buff **gue_gro_receive(struct sk_buff **head,
263 struct sk_buff *skb,
264 struct udp_offload *uoff)
266 const struct net_offload **offloads;
267 const struct net_offload *ops;
268 struct sk_buff **pp = NULL;
269 struct sk_buff *p;
270 struct guehdr *guehdr;
271 size_t len, optlen, hdrlen, off;
272 void *data;
273 u16 doffset = 0;
274 int flush = 1;
275 struct fou *fou = container_of(uoff, struct fou, udp_offloads);
276 struct gro_remcsum grc;
278 skb_gro_remcsum_init(&grc);
280 off = skb_gro_offset(skb);
281 len = off + sizeof(*guehdr);
283 guehdr = skb_gro_header_fast(skb, off);
284 if (skb_gro_header_hard(skb, len)) {
285 guehdr = skb_gro_header_slow(skb, len, off);
286 if (unlikely(!guehdr))
287 goto out;
290 optlen = guehdr->hlen << 2;
291 len += optlen;
293 if (skb_gro_header_hard(skb, len)) {
294 guehdr = skb_gro_header_slow(skb, len, off);
295 if (unlikely(!guehdr))
296 goto out;
299 if (unlikely(guehdr->control) || guehdr->version != 0 ||
300 validate_gue_flags(guehdr, optlen))
301 goto out;
303 hdrlen = sizeof(*guehdr) + optlen;
305 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
306 * this is needed if there is a remote checkcsum offload.
308 skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
310 data = &guehdr[1];
312 if (guehdr->flags & GUE_FLAG_PRIV) {
313 __be32 flags = *(__be32 *)(data + doffset);
315 doffset += GUE_LEN_PRIV;
317 if (flags & GUE_PFLAG_REMCSUM) {
318 guehdr = gue_gro_remcsum(skb, off, guehdr,
319 data + doffset, hdrlen,
320 guehdr->proto_ctype, &grc,
321 !!(fou->flags &
322 FOU_F_REMCSUM_NOPARTIAL));
323 if (!guehdr)
324 goto out;
326 data = &guehdr[1];
328 doffset += GUE_PLEN_REMCSUM;
332 skb_gro_pull(skb, hdrlen);
334 flush = 0;
336 for (p = *head; p; p = p->next) {
337 const struct guehdr *guehdr2;
339 if (!NAPI_GRO_CB(p)->same_flow)
340 continue;
342 guehdr2 = (struct guehdr *)(p->data + off);
344 /* Compare base GUE header to be equal (covers
345 * hlen, version, proto_ctype, and flags.
347 if (guehdr->word != guehdr2->word) {
348 NAPI_GRO_CB(p)->same_flow = 0;
349 continue;
352 /* Compare optional fields are the same. */
353 if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
354 guehdr->hlen << 2)) {
355 NAPI_GRO_CB(p)->same_flow = 0;
356 continue;
360 /* We can clear the encap_mark for GUE as we are essentially doing
361 * one of two possible things. We are either adding an L4 tunnel
362 * header to the outer L3 tunnel header, or we are are simply
363 * treating the GRE tunnel header as though it is a UDP protocol
364 * specific header such as VXLAN or GENEVE.
366 NAPI_GRO_CB(skb)->encap_mark = 0;
368 rcu_read_lock();
369 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
370 ops = rcu_dereference(offloads[guehdr->proto_ctype]);
371 if (WARN_ON(!ops || !ops->callbacks.gro_receive))
372 goto out_unlock;
374 pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
376 out_unlock:
377 rcu_read_unlock();
378 out:
379 NAPI_GRO_CB(skb)->flush |= flush;
380 skb_gro_remcsum_cleanup(skb, &grc);
382 return pp;
385 static int gue_gro_complete(struct sk_buff *skb, int nhoff,
386 struct udp_offload *uoff)
388 const struct net_offload **offloads;
389 struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
390 const struct net_offload *ops;
391 unsigned int guehlen;
392 u8 proto;
393 int err = -ENOENT;
395 proto = guehdr->proto_ctype;
397 guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
399 rcu_read_lock();
400 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
401 ops = rcu_dereference(offloads[proto]);
402 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
403 goto out_unlock;
405 err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
407 out_unlock:
408 rcu_read_unlock();
409 return err;
412 static int fou_add_to_port_list(struct net *net, struct fou *fou)
414 struct fou_net *fn = net_generic(net, fou_net_id);
415 struct fou *fout;
417 mutex_lock(&fn->fou_lock);
418 list_for_each_entry(fout, &fn->fou_list, list) {
419 if (fou->port == fout->port) {
420 mutex_unlock(&fn->fou_lock);
421 return -EALREADY;
425 list_add(&fou->list, &fn->fou_list);
426 mutex_unlock(&fn->fou_lock);
428 return 0;
431 static void fou_release(struct fou *fou)
433 struct socket *sock = fou->sock;
434 struct sock *sk = sock->sk;
436 if (sk->sk_family == AF_INET)
437 udp_del_offload(&fou->udp_offloads);
438 list_del(&fou->list);
439 udp_tunnel_sock_release(sock);
441 kfree_rcu(fou, rcu);
444 static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
446 udp_sk(sk)->encap_rcv = fou_udp_recv;
447 fou->protocol = cfg->protocol;
448 fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
449 fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
450 fou->udp_offloads.port = cfg->udp_config.local_udp_port;
451 fou->udp_offloads.ipproto = cfg->protocol;
453 return 0;
456 static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
458 udp_sk(sk)->encap_rcv = gue_udp_recv;
459 fou->udp_offloads.callbacks.gro_receive = gue_gro_receive;
460 fou->udp_offloads.callbacks.gro_complete = gue_gro_complete;
461 fou->udp_offloads.port = cfg->udp_config.local_udp_port;
463 return 0;
466 static int fou_create(struct net *net, struct fou_cfg *cfg,
467 struct socket **sockp)
469 struct socket *sock = NULL;
470 struct fou *fou = NULL;
471 struct sock *sk;
472 int err;
474 /* Open UDP socket */
475 err = udp_sock_create(net, &cfg->udp_config, &sock);
476 if (err < 0)
477 goto error;
479 /* Allocate FOU port structure */
480 fou = kzalloc(sizeof(*fou), GFP_KERNEL);
481 if (!fou) {
482 err = -ENOMEM;
483 goto error;
486 sk = sock->sk;
488 fou->flags = cfg->flags;
489 fou->port = cfg->udp_config.local_udp_port;
491 /* Initial for fou type */
492 switch (cfg->type) {
493 case FOU_ENCAP_DIRECT:
494 err = fou_encap_init(sk, fou, cfg);
495 if (err)
496 goto error;
497 break;
498 case FOU_ENCAP_GUE:
499 err = gue_encap_init(sk, fou, cfg);
500 if (err)
501 goto error;
502 break;
503 default:
504 err = -EINVAL;
505 goto error;
508 fou->type = cfg->type;
510 udp_sk(sk)->encap_type = 1;
511 udp_encap_enable();
513 sk->sk_user_data = fou;
514 fou->sock = sock;
516 inet_inc_convert_csum(sk);
518 sk->sk_allocation = GFP_ATOMIC;
520 if (cfg->udp_config.family == AF_INET) {
521 err = udp_add_offload(&fou->udp_offloads);
522 if (err)
523 goto error;
526 err = fou_add_to_port_list(net, fou);
527 if (err)
528 goto error;
530 if (sockp)
531 *sockp = sock;
533 return 0;
535 error:
536 kfree(fou);
537 if (sock)
538 udp_tunnel_sock_release(sock);
540 return err;
543 static int fou_destroy(struct net *net, struct fou_cfg *cfg)
545 struct fou_net *fn = net_generic(net, fou_net_id);
546 __be16 port = cfg->udp_config.local_udp_port;
547 int err = -EINVAL;
548 struct fou *fou;
550 mutex_lock(&fn->fou_lock);
551 list_for_each_entry(fou, &fn->fou_list, list) {
552 if (fou->port == port) {
553 fou_release(fou);
554 err = 0;
555 break;
558 mutex_unlock(&fn->fou_lock);
560 return err;
563 static struct genl_family fou_nl_family = {
564 .id = GENL_ID_GENERATE,
565 .hdrsize = 0,
566 .name = FOU_GENL_NAME,
567 .version = FOU_GENL_VERSION,
568 .maxattr = FOU_ATTR_MAX,
569 .netnsok = true,
572 static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
573 [FOU_ATTR_PORT] = { .type = NLA_U16, },
574 [FOU_ATTR_AF] = { .type = NLA_U8, },
575 [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
576 [FOU_ATTR_TYPE] = { .type = NLA_U8, },
577 [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
580 static int parse_nl_config(struct genl_info *info,
581 struct fou_cfg *cfg)
583 memset(cfg, 0, sizeof(*cfg));
585 cfg->udp_config.family = AF_INET;
587 if (info->attrs[FOU_ATTR_AF]) {
588 u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
590 if (family != AF_INET && family != AF_INET6)
591 return -EINVAL;
593 cfg->udp_config.family = family;
596 if (info->attrs[FOU_ATTR_PORT]) {
597 __be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
599 cfg->udp_config.local_udp_port = port;
602 if (info->attrs[FOU_ATTR_IPPROTO])
603 cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
605 if (info->attrs[FOU_ATTR_TYPE])
606 cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
608 if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
609 cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;
611 return 0;
614 static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
616 struct net *net = genl_info_net(info);
617 struct fou_cfg cfg;
618 int err;
620 err = parse_nl_config(info, &cfg);
621 if (err)
622 return err;
624 return fou_create(net, &cfg, NULL);
627 static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
629 struct net *net = genl_info_net(info);
630 struct fou_cfg cfg;
631 int err;
633 err = parse_nl_config(info, &cfg);
634 if (err)
635 return err;
637 return fou_destroy(net, &cfg);
640 static int fou_fill_info(struct fou *fou, struct sk_buff *msg)
642 if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) ||
643 nla_put_be16(msg, FOU_ATTR_PORT, fou->port) ||
644 nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) ||
645 nla_put_u8(msg, FOU_ATTR_TYPE, fou->type))
646 return -1;
648 if (fou->flags & FOU_F_REMCSUM_NOPARTIAL)
649 if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL))
650 return -1;
651 return 0;
654 static int fou_dump_info(struct fou *fou, u32 portid, u32 seq,
655 u32 flags, struct sk_buff *skb, u8 cmd)
657 void *hdr;
659 hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd);
660 if (!hdr)
661 return -ENOMEM;
663 if (fou_fill_info(fou, skb) < 0)
664 goto nla_put_failure;
666 genlmsg_end(skb, hdr);
667 return 0;
669 nla_put_failure:
670 genlmsg_cancel(skb, hdr);
671 return -EMSGSIZE;
674 static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
676 struct net *net = genl_info_net(info);
677 struct fou_net *fn = net_generic(net, fou_net_id);
678 struct sk_buff *msg;
679 struct fou_cfg cfg;
680 struct fou *fout;
681 __be16 port;
682 int ret;
684 ret = parse_nl_config(info, &cfg);
685 if (ret)
686 return ret;
687 port = cfg.udp_config.local_udp_port;
688 if (port == 0)
689 return -EINVAL;
691 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
692 if (!msg)
693 return -ENOMEM;
695 ret = -ESRCH;
696 mutex_lock(&fn->fou_lock);
697 list_for_each_entry(fout, &fn->fou_list, list) {
698 if (port == fout->port) {
699 ret = fou_dump_info(fout, info->snd_portid,
700 info->snd_seq, 0, msg,
701 info->genlhdr->cmd);
702 break;
705 mutex_unlock(&fn->fou_lock);
706 if (ret < 0)
707 goto out_free;
709 return genlmsg_reply(msg, info);
711 out_free:
712 nlmsg_free(msg);
713 return ret;
716 static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
718 struct net *net = sock_net(skb->sk);
719 struct fou_net *fn = net_generic(net, fou_net_id);
720 struct fou *fout;
721 int idx = 0, ret;
723 mutex_lock(&fn->fou_lock);
724 list_for_each_entry(fout, &fn->fou_list, list) {
725 if (idx++ < cb->args[0])
726 continue;
727 ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid,
728 cb->nlh->nlmsg_seq, NLM_F_MULTI,
729 skb, FOU_CMD_GET);
730 if (ret)
731 break;
733 mutex_unlock(&fn->fou_lock);
735 cb->args[0] = idx;
736 return skb->len;
739 static const struct genl_ops fou_nl_ops[] = {
741 .cmd = FOU_CMD_ADD,
742 .doit = fou_nl_cmd_add_port,
743 .policy = fou_nl_policy,
744 .flags = GENL_ADMIN_PERM,
747 .cmd = FOU_CMD_DEL,
748 .doit = fou_nl_cmd_rm_port,
749 .policy = fou_nl_policy,
750 .flags = GENL_ADMIN_PERM,
753 .cmd = FOU_CMD_GET,
754 .doit = fou_nl_cmd_get_port,
755 .dumpit = fou_nl_dump,
756 .policy = fou_nl_policy,
760 size_t fou_encap_hlen(struct ip_tunnel_encap *e)
762 return sizeof(struct udphdr);
764 EXPORT_SYMBOL(fou_encap_hlen);
766 size_t gue_encap_hlen(struct ip_tunnel_encap *e)
768 size_t len;
769 bool need_priv = false;
771 len = sizeof(struct udphdr) + sizeof(struct guehdr);
773 if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) {
774 len += GUE_PLEN_REMCSUM;
775 need_priv = true;
778 len += need_priv ? GUE_LEN_PRIV : 0;
780 return len;
782 EXPORT_SYMBOL(gue_encap_hlen);
784 static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
785 struct flowi4 *fl4, u8 *protocol, __be16 sport)
787 struct udphdr *uh;
789 skb_push(skb, sizeof(struct udphdr));
790 skb_reset_transport_header(skb);
792 uh = udp_hdr(skb);
794 uh->dest = e->dport;
795 uh->source = sport;
796 uh->len = htons(skb->len);
797 uh->check = 0;
798 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
799 fl4->saddr, fl4->daddr, skb->len);
801 *protocol = IPPROTO_UDP;
804 int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
805 u8 *protocol, struct flowi4 *fl4)
807 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
808 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
809 __be16 sport;
811 skb = iptunnel_handle_offloads(skb, csum, type);
813 if (IS_ERR(skb))
814 return PTR_ERR(skb);
816 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
817 skb, 0, 0, false);
818 fou_build_udp(skb, e, fl4, protocol, sport);
820 return 0;
822 EXPORT_SYMBOL(fou_build_header);
824 int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
825 u8 *protocol, struct flowi4 *fl4)
827 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
828 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
829 struct guehdr *guehdr;
830 size_t hdrlen, optlen = 0;
831 __be16 sport;
832 void *data;
833 bool need_priv = false;
835 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
836 skb->ip_summed == CHECKSUM_PARTIAL) {
837 csum = false;
838 optlen += GUE_PLEN_REMCSUM;
839 type |= SKB_GSO_TUNNEL_REMCSUM;
840 need_priv = true;
843 optlen += need_priv ? GUE_LEN_PRIV : 0;
845 skb = iptunnel_handle_offloads(skb, csum, type);
847 if (IS_ERR(skb))
848 return PTR_ERR(skb);
850 /* Get source port (based on flow hash) before skb_push */
851 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
852 skb, 0, 0, false);
854 hdrlen = sizeof(struct guehdr) + optlen;
856 skb_push(skb, hdrlen);
858 guehdr = (struct guehdr *)skb->data;
860 guehdr->control = 0;
861 guehdr->version = 0;
862 guehdr->hlen = optlen >> 2;
863 guehdr->flags = 0;
864 guehdr->proto_ctype = *protocol;
866 data = &guehdr[1];
868 if (need_priv) {
869 __be32 *flags = data;
871 guehdr->flags |= GUE_FLAG_PRIV;
872 *flags = 0;
873 data += GUE_LEN_PRIV;
875 if (type & SKB_GSO_TUNNEL_REMCSUM) {
876 u16 csum_start = skb_checksum_start_offset(skb);
877 __be16 *pd = data;
879 if (csum_start < hdrlen)
880 return -EINVAL;
882 csum_start -= hdrlen;
883 pd[0] = htons(csum_start);
884 pd[1] = htons(csum_start + skb->csum_offset);
886 if (!skb_is_gso(skb)) {
887 skb->ip_summed = CHECKSUM_NONE;
888 skb->encapsulation = 0;
891 *flags |= GUE_PFLAG_REMCSUM;
892 data += GUE_PLEN_REMCSUM;
897 fou_build_udp(skb, e, fl4, protocol, sport);
899 return 0;
901 EXPORT_SYMBOL(gue_build_header);
903 #ifdef CONFIG_NET_FOU_IP_TUNNELS
905 static const struct ip_tunnel_encap_ops fou_iptun_ops = {
906 .encap_hlen = fou_encap_hlen,
907 .build_header = fou_build_header,
910 static const struct ip_tunnel_encap_ops gue_iptun_ops = {
911 .encap_hlen = gue_encap_hlen,
912 .build_header = gue_build_header,
915 static int ip_tunnel_encap_add_fou_ops(void)
917 int ret;
919 ret = ip_tunnel_encap_add_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
920 if (ret < 0) {
921 pr_err("can't add fou ops\n");
922 return ret;
925 ret = ip_tunnel_encap_add_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
926 if (ret < 0) {
927 pr_err("can't add gue ops\n");
928 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
929 return ret;
932 return 0;
935 static void ip_tunnel_encap_del_fou_ops(void)
937 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
938 ip_tunnel_encap_del_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
941 #else
943 static int ip_tunnel_encap_add_fou_ops(void)
945 return 0;
948 static void ip_tunnel_encap_del_fou_ops(void)
952 #endif
954 static __net_init int fou_init_net(struct net *net)
956 struct fou_net *fn = net_generic(net, fou_net_id);
958 INIT_LIST_HEAD(&fn->fou_list);
959 mutex_init(&fn->fou_lock);
960 return 0;
963 static __net_exit void fou_exit_net(struct net *net)
965 struct fou_net *fn = net_generic(net, fou_net_id);
966 struct fou *fou, *next;
968 /* Close all the FOU sockets */
969 mutex_lock(&fn->fou_lock);
970 list_for_each_entry_safe(fou, next, &fn->fou_list, list)
971 fou_release(fou);
972 mutex_unlock(&fn->fou_lock);
975 static struct pernet_operations fou_net_ops = {
976 .init = fou_init_net,
977 .exit = fou_exit_net,
978 .id = &fou_net_id,
979 .size = sizeof(struct fou_net),
982 static int __init fou_init(void)
984 int ret;
986 ret = register_pernet_device(&fou_net_ops);
987 if (ret)
988 goto exit;
990 ret = genl_register_family_with_ops(&fou_nl_family,
991 fou_nl_ops);
992 if (ret < 0)
993 goto unregister;
995 ret = ip_tunnel_encap_add_fou_ops();
996 if (ret == 0)
997 return 0;
999 genl_unregister_family(&fou_nl_family);
1000 unregister:
1001 unregister_pernet_device(&fou_net_ops);
1002 exit:
1003 return ret;
1006 static void __exit fou_fini(void)
1008 ip_tunnel_encap_del_fou_ops();
1009 genl_unregister_family(&fou_nl_family);
1010 unregister_pernet_device(&fou_net_ops);
1013 module_init(fou_init);
1014 module_exit(fou_fini);
1015 MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
1016 MODULE_LICENSE("GPL");