ALSA: hda - Adding one more ALC255 pin definition for headset problem
[linux/fpc-iii.git] / net / ipv4 / ip_gre.c
blob113cc43df789a34b80fcf897621c936ff0cd6ca8
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
53 Problems & solutions
54 --------------------
56 1. The most important issue is detecting local dead loops.
57 They would cause complete host lockup in transmit, which
58 would be "resolved" by stack overflow or, if queueing is enabled,
59 with infinite looping in net_bh.
61 We cannot track such dead loops during route installation,
62 it is infeasible task. The most general solutions would be
63 to keep skb->encapsulation counter (sort of local ttl),
64 and silently drop packet when it expires. It is a good
65 solution, but it supposes maintaining new variable in ALL
66 skb, even if no tunneling is used.
68 Current solution: xmit_recursion breaks dead loops. This is a percpu
69 counter, since when we enter the first ndo_xmit(), cpu migration is
70 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
72 2. Networking dead loops would not kill routers, but would really
73 kill network. IP hop limit plays role of "t->recursion" in this case,
74 if we copy it from packet being encapsulated to upper header.
75 It is very good solution, but it introduces two problems:
77 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
78 do not work over tunnels.
79 - traceroute does not work. I planned to relay ICMP from tunnel,
80 so that this problem would be solved and traceroute output
81 would even more informative. This idea appeared to be wrong:
82 only Linux complies to rfc1812 now (yes, guys, Linux is the only
83 true router now :-)), all routers (at least, in neighbourhood of mine)
84 return only 8 bytes of payload. It is the end.
86 Hence, if we want that OSPF worked or traceroute said something reasonable,
87 we should search for another solution.
89 One of them is to parse packet trying to detect inner encapsulation
90 made by our node. It is difficult or even impossible, especially,
91 taking into account fragmentation. TO be short, ttl is not solution at all.
93 Current solution: The solution was UNEXPECTEDLY SIMPLE.
94 We force DF flag on tunnels with preconfigured hop limit,
95 that is ALL. :-) Well, it does not remove the problem completely,
96 but exponential growth of network traffic is changed to linear
97 (branches, that exceed pmtu are pruned) and tunnel mtu
98 rapidly degrades to value <68, where looping stops.
99 Yes, it is not good if there exists a router in the loop,
100 which does not force DF, even when encapsulating packets have DF set.
101 But it is not our problem! Nobody could accuse us, we made
102 all that we could make. Even if it is your gated who injected
103 fatal route to network, even if it were you who configured
104 fatal static route: you are innocent. :-)
106 Alexey Kuznetsov.
109 static bool log_ecn_error = true;
110 module_param(log_ecn_error, bool, 0644);
111 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
114 static int ipgre_tunnel_init(struct net_device *dev);
116 static int ipgre_net_id __read_mostly;
117 static int gre_tap_net_id __read_mostly;
119 static void ipgre_err(struct sk_buff *skb, u32 info,
120 const struct tnl_ptk_info *tpi)
123 /* All the routers (except for Linux) return only
124 8 bytes of packet payload. It means, that precise relaying of
125 ICMP in the real Internet is absolutely infeasible.
127 Moreover, Cisco "wise men" put GRE key to the third word
128 in GRE header. It makes impossible maintaining even soft
129 state for keyed GRE tunnels with enabled checksum. Tell
130 them "thank you".
132 Well, I wonder, rfc1812 was written by Cisco employee,
133 what the hell these idiots break standards established
134 by themselves???
136 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn;
138 const struct iphdr *iph;
139 const int type = icmp_hdr(skb)->type;
140 const int code = icmp_hdr(skb)->code;
141 unsigned int data_len = 0;
142 struct ip_tunnel *t;
144 switch (type) {
145 default:
146 case ICMP_PARAMETERPROB:
147 return;
149 case ICMP_DEST_UNREACH:
150 switch (code) {
151 case ICMP_SR_FAILED:
152 case ICMP_PORT_UNREACH:
153 /* Impossible event. */
154 return;
155 default:
156 /* All others are translated to HOST_UNREACH.
157 rfc2003 contains "deep thoughts" about NET_UNREACH,
158 I believe they are just ether pollution. --ANK
160 break;
162 break;
164 case ICMP_TIME_EXCEEDED:
165 if (code != ICMP_EXC_TTL)
166 return;
167 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
168 break;
170 case ICMP_REDIRECT:
171 break;
174 if (tpi->proto == htons(ETH_P_TEB))
175 itn = net_generic(net, gre_tap_net_id);
176 else
177 itn = net_generic(net, ipgre_net_id);
179 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
180 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
181 iph->daddr, iph->saddr, tpi->key);
183 if (!t)
184 return;
186 #if IS_ENABLED(CONFIG_IPV6)
187 if (tpi->proto == htons(ETH_P_IPV6) &&
188 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
189 type, data_len))
190 return;
191 #endif
193 if (t->parms.iph.daddr == 0 ||
194 ipv4_is_multicast(t->parms.iph.daddr))
195 return;
197 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
198 return;
200 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
201 t->err_count++;
202 else
203 t->err_count = 1;
204 t->err_time = jiffies;
207 static void gre_err(struct sk_buff *skb, u32 info)
209 /* All the routers (except for Linux) return only
210 * 8 bytes of packet payload. It means, that precise relaying of
211 * ICMP in the real Internet is absolutely infeasible.
213 * Moreover, Cisco "wise men" put GRE key to the third word
214 * in GRE header. It makes impossible maintaining even soft
215 * state for keyed
216 * GRE tunnels with enabled checksum. Tell them "thank you".
218 * Well, I wonder, rfc1812 was written by Cisco employee,
219 * what the hell these idiots break standards established
220 * by themselves???
223 const struct iphdr *iph = (struct iphdr *)skb->data;
224 const int type = icmp_hdr(skb)->type;
225 const int code = icmp_hdr(skb)->code;
226 struct tnl_ptk_info tpi;
227 bool csum_err = false;
229 if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
230 iph->ihl * 4) < 0) {
231 if (!csum_err) /* ignore csum errors. */
232 return;
235 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
236 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
237 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
238 return;
240 if (type == ICMP_REDIRECT) {
241 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
242 IPPROTO_GRE, 0);
243 return;
246 ipgre_err(skb, info, &tpi);
249 static __be64 key_to_tunnel_id(__be32 key)
251 #ifdef __BIG_ENDIAN
252 return (__force __be64)((__force u32)key);
253 #else
254 return (__force __be64)((__force u64)key << 32);
255 #endif
258 /* Returns the least-significant 32 bits of a __be64. */
259 static __be32 tunnel_id_to_key(__be64 x)
261 #ifdef __BIG_ENDIAN
262 return (__force __be32)x;
263 #else
264 return (__force __be32)((__force u64)x >> 32);
265 #endif
268 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
269 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
271 struct metadata_dst *tun_dst = NULL;
272 const struct iphdr *iph;
273 struct ip_tunnel *tunnel;
275 iph = ip_hdr(skb);
276 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
277 iph->saddr, iph->daddr, tpi->key);
279 if (tunnel) {
280 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
281 raw_proto, false) < 0)
282 goto drop;
284 if (tunnel->dev->type != ARPHRD_NONE)
285 skb_pop_mac_header(skb);
286 else
287 skb_reset_mac_header(skb);
288 if (tunnel->collect_md) {
289 __be16 flags;
290 __be64 tun_id;
292 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
293 tun_id = key_to_tunnel_id(tpi->key);
294 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
295 if (!tun_dst)
296 return PACKET_REJECT;
299 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
300 return PACKET_RCVD;
302 return PACKET_NEXT;
304 drop:
305 kfree_skb(skb);
306 return PACKET_RCVD;
309 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
310 int hdr_len)
312 struct net *net = dev_net(skb->dev);
313 struct ip_tunnel_net *itn;
314 int res;
316 if (tpi->proto == htons(ETH_P_TEB))
317 itn = net_generic(net, gre_tap_net_id);
318 else
319 itn = net_generic(net, ipgre_net_id);
321 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
322 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
323 /* ipgre tunnels in collect metadata mode should receive
324 * also ETH_P_TEB traffic.
326 itn = net_generic(net, ipgre_net_id);
327 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
329 return res;
332 static int gre_rcv(struct sk_buff *skb)
334 struct tnl_ptk_info tpi;
335 bool csum_err = false;
336 int hdr_len;
338 #ifdef CONFIG_NET_IPGRE_BROADCAST
339 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
340 /* Looped back packet, drop it! */
341 if (rt_is_output_route(skb_rtable(skb)))
342 goto drop;
344 #endif
346 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
347 if (hdr_len < 0)
348 goto drop;
350 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
351 return 0;
353 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
354 drop:
355 kfree_skb(skb);
356 return 0;
359 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
360 const struct iphdr *tnl_params,
361 __be16 proto)
363 struct ip_tunnel *tunnel = netdev_priv(dev);
365 if (tunnel->parms.o_flags & TUNNEL_SEQ)
366 tunnel->o_seqno++;
368 /* Push GRE header. */
369 gre_build_header(skb, tunnel->tun_hlen,
370 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
371 htonl(tunnel->o_seqno));
373 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
376 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
378 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
381 static struct rtable *gre_get_rt(struct sk_buff *skb,
382 struct net_device *dev,
383 struct flowi4 *fl,
384 const struct ip_tunnel_key *key)
386 struct net *net = dev_net(dev);
388 memset(fl, 0, sizeof(*fl));
389 fl->daddr = key->u.ipv4.dst;
390 fl->saddr = key->u.ipv4.src;
391 fl->flowi4_tos = RT_TOS(key->tos);
392 fl->flowi4_mark = skb->mark;
393 fl->flowi4_proto = IPPROTO_GRE;
395 return ip_route_output_key(net, fl);
398 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
399 __be16 proto)
401 struct ip_tunnel_info *tun_info;
402 const struct ip_tunnel_key *key;
403 struct rtable *rt = NULL;
404 struct flowi4 fl;
405 int min_headroom;
406 int tunnel_hlen;
407 __be16 df, flags;
408 bool use_cache;
409 int err;
411 tun_info = skb_tunnel_info(skb);
412 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
413 ip_tunnel_info_af(tun_info) != AF_INET))
414 goto err_free_skb;
416 key = &tun_info->key;
417 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
418 if (use_cache)
419 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
420 if (!rt) {
421 rt = gre_get_rt(skb, dev, &fl, key);
422 if (IS_ERR(rt))
423 goto err_free_skb;
424 if (use_cache)
425 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
426 fl.saddr);
429 tunnel_hlen = gre_calc_hlen(key->tun_flags);
431 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
432 + tunnel_hlen + sizeof(struct iphdr);
433 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
434 int head_delta = SKB_DATA_ALIGN(min_headroom -
435 skb_headroom(skb) +
436 16);
437 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
438 0, GFP_ATOMIC);
439 if (unlikely(err))
440 goto err_free_rt;
443 /* Push Tunnel header. */
444 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
445 goto err_free_rt;
447 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
448 gre_build_header(skb, tunnel_hlen, flags, proto,
449 tunnel_id_to_key(tun_info->key.tun_id), 0);
451 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
453 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
454 key->tos, key->ttl, df, false);
455 return;
457 err_free_rt:
458 ip_rt_put(rt);
459 err_free_skb:
460 kfree_skb(skb);
461 dev->stats.tx_dropped++;
464 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
466 struct ip_tunnel_info *info = skb_tunnel_info(skb);
467 struct rtable *rt;
468 struct flowi4 fl4;
470 if (ip_tunnel_info_af(info) != AF_INET)
471 return -EINVAL;
473 rt = gre_get_rt(skb, dev, &fl4, &info->key);
474 if (IS_ERR(rt))
475 return PTR_ERR(rt);
477 ip_rt_put(rt);
478 info->key.u.ipv4.src = fl4.saddr;
479 return 0;
482 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
483 struct net_device *dev)
485 struct ip_tunnel *tunnel = netdev_priv(dev);
486 const struct iphdr *tnl_params;
488 if (tunnel->collect_md) {
489 gre_fb_xmit(skb, dev, skb->protocol);
490 return NETDEV_TX_OK;
493 if (dev->header_ops) {
494 /* Need space for new headers */
495 if (skb_cow_head(skb, dev->needed_headroom -
496 (tunnel->hlen + sizeof(struct iphdr))))
497 goto free_skb;
499 tnl_params = (const struct iphdr *)skb->data;
501 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
502 * to gre header.
504 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
505 skb_reset_mac_header(skb);
506 } else {
507 if (skb_cow_head(skb, dev->needed_headroom))
508 goto free_skb;
510 tnl_params = &tunnel->parms.iph;
513 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
514 goto free_skb;
516 __gre_xmit(skb, dev, tnl_params, skb->protocol);
517 return NETDEV_TX_OK;
519 free_skb:
520 kfree_skb(skb);
521 dev->stats.tx_dropped++;
522 return NETDEV_TX_OK;
525 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
526 struct net_device *dev)
528 struct ip_tunnel *tunnel = netdev_priv(dev);
530 if (tunnel->collect_md) {
531 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
532 return NETDEV_TX_OK;
535 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
536 goto free_skb;
538 if (skb_cow_head(skb, dev->needed_headroom))
539 goto free_skb;
541 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
542 return NETDEV_TX_OK;
544 free_skb:
545 kfree_skb(skb);
546 dev->stats.tx_dropped++;
547 return NETDEV_TX_OK;
550 static int ipgre_tunnel_ioctl(struct net_device *dev,
551 struct ifreq *ifr, int cmd)
553 int err;
554 struct ip_tunnel_parm p;
556 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
557 return -EFAULT;
558 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
559 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
560 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
561 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
562 return -EINVAL;
564 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
565 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
567 err = ip_tunnel_ioctl(dev, &p, cmd);
568 if (err)
569 return err;
571 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
572 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
574 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
575 return -EFAULT;
576 return 0;
579 /* Nice toy. Unfortunately, useless in real life :-)
580 It allows to construct virtual multiprotocol broadcast "LAN"
581 over the Internet, provided multicast routing is tuned.
584 I have no idea was this bicycle invented before me,
585 so that I had to set ARPHRD_IPGRE to a random value.
586 I have an impression, that Cisco could make something similar,
587 but this feature is apparently missing in IOS<=11.2(8).
589 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
590 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
592 ping -t 255 224.66.66.66
594 If nobody answers, mbone does not work.
596 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
597 ip addr add 10.66.66.<somewhat>/24 dev Universe
598 ifconfig Universe up
599 ifconfig Universe add fe80::<Your_real_addr>/10
600 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
601 ftp 10.66.66.66
603 ftp fec0:6666:6666::193.233.7.65
606 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
607 unsigned short type,
608 const void *daddr, const void *saddr, unsigned int len)
610 struct ip_tunnel *t = netdev_priv(dev);
611 struct iphdr *iph;
612 struct gre_base_hdr *greh;
614 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
615 greh = (struct gre_base_hdr *)(iph+1);
616 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
617 greh->protocol = htons(type);
619 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
621 /* Set the source hardware address. */
622 if (saddr)
623 memcpy(&iph->saddr, saddr, 4);
624 if (daddr)
625 memcpy(&iph->daddr, daddr, 4);
626 if (iph->daddr)
627 return t->hlen + sizeof(*iph);
629 return -(t->hlen + sizeof(*iph));
632 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
634 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
635 memcpy(haddr, &iph->saddr, 4);
636 return 4;
639 static const struct header_ops ipgre_header_ops = {
640 .create = ipgre_header,
641 .parse = ipgre_header_parse,
644 #ifdef CONFIG_NET_IPGRE_BROADCAST
645 static int ipgre_open(struct net_device *dev)
647 struct ip_tunnel *t = netdev_priv(dev);
649 if (ipv4_is_multicast(t->parms.iph.daddr)) {
650 struct flowi4 fl4;
651 struct rtable *rt;
653 rt = ip_route_output_gre(t->net, &fl4,
654 t->parms.iph.daddr,
655 t->parms.iph.saddr,
656 t->parms.o_key,
657 RT_TOS(t->parms.iph.tos),
658 t->parms.link);
659 if (IS_ERR(rt))
660 return -EADDRNOTAVAIL;
661 dev = rt->dst.dev;
662 ip_rt_put(rt);
663 if (!__in_dev_get_rtnl(dev))
664 return -EADDRNOTAVAIL;
665 t->mlink = dev->ifindex;
666 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
668 return 0;
671 static int ipgre_close(struct net_device *dev)
673 struct ip_tunnel *t = netdev_priv(dev);
675 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
676 struct in_device *in_dev;
677 in_dev = inetdev_by_index(t->net, t->mlink);
678 if (in_dev)
679 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
681 return 0;
683 #endif
685 static const struct net_device_ops ipgre_netdev_ops = {
686 .ndo_init = ipgre_tunnel_init,
687 .ndo_uninit = ip_tunnel_uninit,
688 #ifdef CONFIG_NET_IPGRE_BROADCAST
689 .ndo_open = ipgre_open,
690 .ndo_stop = ipgre_close,
691 #endif
692 .ndo_start_xmit = ipgre_xmit,
693 .ndo_do_ioctl = ipgre_tunnel_ioctl,
694 .ndo_change_mtu = ip_tunnel_change_mtu,
695 .ndo_get_stats64 = ip_tunnel_get_stats64,
696 .ndo_get_iflink = ip_tunnel_get_iflink,
699 #define GRE_FEATURES (NETIF_F_SG | \
700 NETIF_F_FRAGLIST | \
701 NETIF_F_HIGHDMA | \
702 NETIF_F_HW_CSUM)
704 static void ipgre_tunnel_setup(struct net_device *dev)
706 dev->netdev_ops = &ipgre_netdev_ops;
707 dev->type = ARPHRD_IPGRE;
708 ip_tunnel_setup(dev, ipgre_net_id);
711 static void __gre_tunnel_init(struct net_device *dev)
713 struct ip_tunnel *tunnel;
714 int t_hlen;
716 tunnel = netdev_priv(dev);
717 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
718 tunnel->parms.iph.protocol = IPPROTO_GRE;
720 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
722 t_hlen = tunnel->hlen + sizeof(struct iphdr);
724 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
725 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
727 dev->features |= GRE_FEATURES;
728 dev->hw_features |= GRE_FEATURES;
730 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
731 /* TCP offload with GRE SEQ is not supported, nor
732 * can we support 2 levels of outer headers requiring
733 * an update.
735 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
736 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
737 dev->features |= NETIF_F_GSO_SOFTWARE;
738 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
741 /* Can use a lockless transmit, unless we generate
742 * output sequences
744 dev->features |= NETIF_F_LLTX;
748 static int ipgre_tunnel_init(struct net_device *dev)
750 struct ip_tunnel *tunnel = netdev_priv(dev);
751 struct iphdr *iph = &tunnel->parms.iph;
753 __gre_tunnel_init(dev);
755 memcpy(dev->dev_addr, &iph->saddr, 4);
756 memcpy(dev->broadcast, &iph->daddr, 4);
758 dev->flags = IFF_NOARP;
759 netif_keep_dst(dev);
760 dev->addr_len = 4;
762 if (iph->daddr && !tunnel->collect_md) {
763 #ifdef CONFIG_NET_IPGRE_BROADCAST
764 if (ipv4_is_multicast(iph->daddr)) {
765 if (!iph->saddr)
766 return -EINVAL;
767 dev->flags = IFF_BROADCAST;
768 dev->header_ops = &ipgre_header_ops;
770 #endif
771 } else if (!tunnel->collect_md) {
772 dev->header_ops = &ipgre_header_ops;
775 return ip_tunnel_init(dev);
778 static const struct gre_protocol ipgre_protocol = {
779 .handler = gre_rcv,
780 .err_handler = gre_err,
783 static int __net_init ipgre_init_net(struct net *net)
785 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
788 static void __net_exit ipgre_exit_net(struct net *net)
790 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
791 ip_tunnel_delete_net(itn, &ipgre_link_ops);
794 static struct pernet_operations ipgre_net_ops = {
795 .init = ipgre_init_net,
796 .exit = ipgre_exit_net,
797 .id = &ipgre_net_id,
798 .size = sizeof(struct ip_tunnel_net),
801 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
803 __be16 flags;
805 if (!data)
806 return 0;
808 flags = 0;
809 if (data[IFLA_GRE_IFLAGS])
810 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
811 if (data[IFLA_GRE_OFLAGS])
812 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
813 if (flags & (GRE_VERSION|GRE_ROUTING))
814 return -EINVAL;
816 if (data[IFLA_GRE_COLLECT_METADATA] &&
817 data[IFLA_GRE_ENCAP_TYPE] &&
818 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
819 return -EINVAL;
821 return 0;
824 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
826 __be32 daddr;
828 if (tb[IFLA_ADDRESS]) {
829 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
830 return -EINVAL;
831 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
832 return -EADDRNOTAVAIL;
835 if (!data)
836 goto out;
838 if (data[IFLA_GRE_REMOTE]) {
839 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
840 if (!daddr)
841 return -EINVAL;
844 out:
845 return ipgre_tunnel_validate(tb, data);
848 static int ipgre_netlink_parms(struct net_device *dev,
849 struct nlattr *data[],
850 struct nlattr *tb[],
851 struct ip_tunnel_parm *parms)
853 struct ip_tunnel *t = netdev_priv(dev);
855 memset(parms, 0, sizeof(*parms));
857 parms->iph.protocol = IPPROTO_GRE;
859 if (!data)
860 return 0;
862 if (data[IFLA_GRE_LINK])
863 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
865 if (data[IFLA_GRE_IFLAGS])
866 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
868 if (data[IFLA_GRE_OFLAGS])
869 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
871 if (data[IFLA_GRE_IKEY])
872 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
874 if (data[IFLA_GRE_OKEY])
875 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
877 if (data[IFLA_GRE_LOCAL])
878 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
880 if (data[IFLA_GRE_REMOTE])
881 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
883 if (data[IFLA_GRE_TTL])
884 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
886 if (data[IFLA_GRE_TOS])
887 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
889 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
890 if (t->ignore_df)
891 return -EINVAL;
892 parms->iph.frag_off = htons(IP_DF);
895 if (data[IFLA_GRE_COLLECT_METADATA]) {
896 t->collect_md = true;
897 if (dev->type == ARPHRD_IPGRE)
898 dev->type = ARPHRD_NONE;
901 if (data[IFLA_GRE_IGNORE_DF]) {
902 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
903 && (parms->iph.frag_off & htons(IP_DF)))
904 return -EINVAL;
905 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
908 return 0;
911 /* This function returns true when ENCAP attributes are present in the nl msg */
912 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
913 struct ip_tunnel_encap *ipencap)
915 bool ret = false;
917 memset(ipencap, 0, sizeof(*ipencap));
919 if (!data)
920 return ret;
922 if (data[IFLA_GRE_ENCAP_TYPE]) {
923 ret = true;
924 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
927 if (data[IFLA_GRE_ENCAP_FLAGS]) {
928 ret = true;
929 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
932 if (data[IFLA_GRE_ENCAP_SPORT]) {
933 ret = true;
934 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
937 if (data[IFLA_GRE_ENCAP_DPORT]) {
938 ret = true;
939 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
942 return ret;
945 static int gre_tap_init(struct net_device *dev)
947 __gre_tunnel_init(dev);
948 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
950 return ip_tunnel_init(dev);
953 static const struct net_device_ops gre_tap_netdev_ops = {
954 .ndo_init = gre_tap_init,
955 .ndo_uninit = ip_tunnel_uninit,
956 .ndo_start_xmit = gre_tap_xmit,
957 .ndo_set_mac_address = eth_mac_addr,
958 .ndo_validate_addr = eth_validate_addr,
959 .ndo_change_mtu = ip_tunnel_change_mtu,
960 .ndo_get_stats64 = ip_tunnel_get_stats64,
961 .ndo_get_iflink = ip_tunnel_get_iflink,
962 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
965 static void ipgre_tap_setup(struct net_device *dev)
967 ether_setup(dev);
968 dev->netdev_ops = &gre_tap_netdev_ops;
969 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
970 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
971 ip_tunnel_setup(dev, gre_tap_net_id);
974 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
975 struct nlattr *tb[], struct nlattr *data[])
977 struct ip_tunnel_parm p;
978 struct ip_tunnel_encap ipencap;
979 int err;
981 if (ipgre_netlink_encap_parms(data, &ipencap)) {
982 struct ip_tunnel *t = netdev_priv(dev);
983 err = ip_tunnel_encap_setup(t, &ipencap);
985 if (err < 0)
986 return err;
989 err = ipgre_netlink_parms(dev, data, tb, &p);
990 if (err < 0)
991 return err;
992 return ip_tunnel_newlink(dev, tb, &p);
995 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
996 struct nlattr *data[])
998 struct ip_tunnel_parm p;
999 struct ip_tunnel_encap ipencap;
1000 int err;
1002 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1003 struct ip_tunnel *t = netdev_priv(dev);
1004 err = ip_tunnel_encap_setup(t, &ipencap);
1006 if (err < 0)
1007 return err;
1010 err = ipgre_netlink_parms(dev, data, tb, &p);
1011 if (err < 0)
1012 return err;
1013 return ip_tunnel_changelink(dev, tb, &p);
1016 static size_t ipgre_get_size(const struct net_device *dev)
1018 return
1019 /* IFLA_GRE_LINK */
1020 nla_total_size(4) +
1021 /* IFLA_GRE_IFLAGS */
1022 nla_total_size(2) +
1023 /* IFLA_GRE_OFLAGS */
1024 nla_total_size(2) +
1025 /* IFLA_GRE_IKEY */
1026 nla_total_size(4) +
1027 /* IFLA_GRE_OKEY */
1028 nla_total_size(4) +
1029 /* IFLA_GRE_LOCAL */
1030 nla_total_size(4) +
1031 /* IFLA_GRE_REMOTE */
1032 nla_total_size(4) +
1033 /* IFLA_GRE_TTL */
1034 nla_total_size(1) +
1035 /* IFLA_GRE_TOS */
1036 nla_total_size(1) +
1037 /* IFLA_GRE_PMTUDISC */
1038 nla_total_size(1) +
1039 /* IFLA_GRE_ENCAP_TYPE */
1040 nla_total_size(2) +
1041 /* IFLA_GRE_ENCAP_FLAGS */
1042 nla_total_size(2) +
1043 /* IFLA_GRE_ENCAP_SPORT */
1044 nla_total_size(2) +
1045 /* IFLA_GRE_ENCAP_DPORT */
1046 nla_total_size(2) +
1047 /* IFLA_GRE_COLLECT_METADATA */
1048 nla_total_size(0) +
1049 /* IFLA_GRE_IGNORE_DF */
1050 nla_total_size(1) +
1054 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1056 struct ip_tunnel *t = netdev_priv(dev);
1057 struct ip_tunnel_parm *p = &t->parms;
1059 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1060 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1061 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1062 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1063 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1064 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1065 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1066 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1067 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1068 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1069 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1070 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1071 !!(p->iph.frag_off & htons(IP_DF))))
1072 goto nla_put_failure;
1074 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1075 t->encap.type) ||
1076 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1077 t->encap.sport) ||
1078 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1079 t->encap.dport) ||
1080 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1081 t->encap.flags))
1082 goto nla_put_failure;
1084 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1085 goto nla_put_failure;
1087 if (t->collect_md) {
1088 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1089 goto nla_put_failure;
1092 return 0;
1094 nla_put_failure:
1095 return -EMSGSIZE;
1098 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1099 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1100 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1101 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1102 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1103 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1104 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1105 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1106 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1107 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1108 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1109 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1110 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1111 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1112 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1113 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1114 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1117 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1118 .kind = "gre",
1119 .maxtype = IFLA_GRE_MAX,
1120 .policy = ipgre_policy,
1121 .priv_size = sizeof(struct ip_tunnel),
1122 .setup = ipgre_tunnel_setup,
1123 .validate = ipgre_tunnel_validate,
1124 .newlink = ipgre_newlink,
1125 .changelink = ipgre_changelink,
1126 .dellink = ip_tunnel_dellink,
1127 .get_size = ipgre_get_size,
1128 .fill_info = ipgre_fill_info,
1129 .get_link_net = ip_tunnel_get_link_net,
1132 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1133 .kind = "gretap",
1134 .maxtype = IFLA_GRE_MAX,
1135 .policy = ipgre_policy,
1136 .priv_size = sizeof(struct ip_tunnel),
1137 .setup = ipgre_tap_setup,
1138 .validate = ipgre_tap_validate,
1139 .newlink = ipgre_newlink,
1140 .changelink = ipgre_changelink,
1141 .dellink = ip_tunnel_dellink,
1142 .get_size = ipgre_get_size,
1143 .fill_info = ipgre_fill_info,
1144 .get_link_net = ip_tunnel_get_link_net,
1147 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1148 u8 name_assign_type)
1150 struct nlattr *tb[IFLA_MAX + 1];
1151 struct net_device *dev;
1152 LIST_HEAD(list_kill);
1153 struct ip_tunnel *t;
1154 int err;
1156 memset(&tb, 0, sizeof(tb));
1158 dev = rtnl_create_link(net, name, name_assign_type,
1159 &ipgre_tap_ops, tb);
1160 if (IS_ERR(dev))
1161 return dev;
1163 /* Configure flow based GRE device. */
1164 t = netdev_priv(dev);
1165 t->collect_md = true;
1167 err = ipgre_newlink(net, dev, tb, NULL);
1168 if (err < 0) {
1169 free_netdev(dev);
1170 return ERR_PTR(err);
1173 /* openvswitch users expect packet sizes to be unrestricted,
1174 * so set the largest MTU we can.
1176 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1177 if (err)
1178 goto out;
1180 err = rtnl_configure_link(dev, NULL);
1181 if (err < 0)
1182 goto out;
1184 return dev;
1185 out:
1186 ip_tunnel_dellink(dev, &list_kill);
1187 unregister_netdevice_many(&list_kill);
1188 return ERR_PTR(err);
1190 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1192 static int __net_init ipgre_tap_init_net(struct net *net)
1194 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1197 static void __net_exit ipgre_tap_exit_net(struct net *net)
1199 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1200 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1203 static struct pernet_operations ipgre_tap_net_ops = {
1204 .init = ipgre_tap_init_net,
1205 .exit = ipgre_tap_exit_net,
1206 .id = &gre_tap_net_id,
1207 .size = sizeof(struct ip_tunnel_net),
1210 static int __init ipgre_init(void)
1212 int err;
1214 pr_info("GRE over IPv4 tunneling driver\n");
1216 err = register_pernet_device(&ipgre_net_ops);
1217 if (err < 0)
1218 return err;
1220 err = register_pernet_device(&ipgre_tap_net_ops);
1221 if (err < 0)
1222 goto pnet_tap_faied;
1224 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1225 if (err < 0) {
1226 pr_info("%s: can't add protocol\n", __func__);
1227 goto add_proto_failed;
1230 err = rtnl_link_register(&ipgre_link_ops);
1231 if (err < 0)
1232 goto rtnl_link_failed;
1234 err = rtnl_link_register(&ipgre_tap_ops);
1235 if (err < 0)
1236 goto tap_ops_failed;
1238 return 0;
1240 tap_ops_failed:
1241 rtnl_link_unregister(&ipgre_link_ops);
1242 rtnl_link_failed:
1243 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1244 add_proto_failed:
1245 unregister_pernet_device(&ipgre_tap_net_ops);
1246 pnet_tap_faied:
1247 unregister_pernet_device(&ipgre_net_ops);
1248 return err;
1251 static void __exit ipgre_fini(void)
1253 rtnl_link_unregister(&ipgre_tap_ops);
1254 rtnl_link_unregister(&ipgre_link_ops);
1255 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1256 unregister_pernet_device(&ipgre_tap_net_ops);
1257 unregister_pernet_device(&ipgre_net_ops);
1260 module_init(ipgre_init);
1261 module_exit(ipgre_fini);
1262 MODULE_LICENSE("GPL");
1263 MODULE_ALIAS_RTNL_LINK("gre");
1264 MODULE_ALIAS_RTNL_LINK("gretap");
1265 MODULE_ALIAS_NETDEV("gre0");
1266 MODULE_ALIAS_NETDEV("gretap0");