2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
39 #include <net/protocol.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
51 #if IS_ENABLED(CONFIG_IPV6)
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
69 and silently drop packet when it expires. It is a good
70 solution, but it supposes maintaining new variable in ALL
71 skb, even if no tunneling is used.
73 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
96 taking into account fragmentation. TO be short, ttl is not solution at all.
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
103 rapidly degrades to value <68, where looping stops.
104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
123 static struct rtnl_link_ops ipgre_link_ops __read_mostly
;
124 static int ipgre_tunnel_init(struct net_device
*dev
);
125 static void ipgre_tunnel_setup(struct net_device
*dev
);
126 static int ipgre_tunnel_bind_dev(struct net_device
*dev
);
128 /* Fallback tunnel: no source, no destination, no key, no options */
132 static int ipgre_net_id __read_mostly
;
134 struct ip_tunnel __rcu
*tunnels
[4][HASH_SIZE
];
136 struct net_device
*fb_tunnel_dev
;
139 /* Tunnel hash table */
149 We require exact key match i.e. if a key is present in packet
150 it will match only tunnel with the same key; if it is not present,
151 it will match only keyless tunnel.
153 All keysless packets, if not matched configured keyless tunnels
154 will match fallback tunnel.
157 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
159 #define tunnels_r_l tunnels[3]
160 #define tunnels_r tunnels[2]
161 #define tunnels_l tunnels[1]
162 #define tunnels_wc tunnels[0]
164 * Locking : hash tables are protected by RCU and RTNL
167 #define for_each_ip_tunnel_rcu(start) \
168 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
170 /* often modified stats are per cpu, other are shared (netdev->stats) */
176 struct u64_stats_sync syncp
;
179 static struct rtnl_link_stats64
*ipgre_get_stats64(struct net_device
*dev
,
180 struct rtnl_link_stats64
*tot
)
184 for_each_possible_cpu(i
) {
185 const struct pcpu_tstats
*tstats
= per_cpu_ptr(dev
->tstats
, i
);
186 u64 rx_packets
, rx_bytes
, tx_packets
, tx_bytes
;
190 start
= u64_stats_fetch_begin_bh(&tstats
->syncp
);
191 rx_packets
= tstats
->rx_packets
;
192 tx_packets
= tstats
->tx_packets
;
193 rx_bytes
= tstats
->rx_bytes
;
194 tx_bytes
= tstats
->tx_bytes
;
195 } while (u64_stats_fetch_retry_bh(&tstats
->syncp
, start
));
197 tot
->rx_packets
+= rx_packets
;
198 tot
->tx_packets
+= tx_packets
;
199 tot
->rx_bytes
+= rx_bytes
;
200 tot
->tx_bytes
+= tx_bytes
;
203 tot
->multicast
= dev
->stats
.multicast
;
204 tot
->rx_crc_errors
= dev
->stats
.rx_crc_errors
;
205 tot
->rx_fifo_errors
= dev
->stats
.rx_fifo_errors
;
206 tot
->rx_length_errors
= dev
->stats
.rx_length_errors
;
207 tot
->rx_errors
= dev
->stats
.rx_errors
;
208 tot
->tx_fifo_errors
= dev
->stats
.tx_fifo_errors
;
209 tot
->tx_carrier_errors
= dev
->stats
.tx_carrier_errors
;
210 tot
->tx_dropped
= dev
->stats
.tx_dropped
;
211 tot
->tx_aborted_errors
= dev
->stats
.tx_aborted_errors
;
212 tot
->tx_errors
= dev
->stats
.tx_errors
;
217 /* Given src, dst and key, find appropriate for input tunnel. */
219 static struct ip_tunnel
*ipgre_tunnel_lookup(struct net_device
*dev
,
220 __be32 remote
, __be32 local
,
221 __be32 key
, __be16 gre_proto
)
223 struct net
*net
= dev_net(dev
);
224 int link
= dev
->ifindex
;
225 unsigned int h0
= HASH(remote
);
226 unsigned int h1
= HASH(key
);
227 struct ip_tunnel
*t
, *cand
= NULL
;
228 struct ipgre_net
*ign
= net_generic(net
, ipgre_net_id
);
229 int dev_type
= (gre_proto
== htons(ETH_P_TEB
)) ?
230 ARPHRD_ETHER
: ARPHRD_IPGRE
;
231 int score
, cand_score
= 4;
233 for_each_ip_tunnel_rcu(ign
->tunnels_r_l
[h0
^ h1
]) {
234 if (local
!= t
->parms
.iph
.saddr
||
235 remote
!= t
->parms
.iph
.daddr
||
236 key
!= t
->parms
.i_key
||
237 !(t
->dev
->flags
& IFF_UP
))
240 if (t
->dev
->type
!= ARPHRD_IPGRE
&&
241 t
->dev
->type
!= dev_type
)
245 if (t
->parms
.link
!= link
)
247 if (t
->dev
->type
!= dev_type
)
252 if (score
< cand_score
) {
258 for_each_ip_tunnel_rcu(ign
->tunnels_r
[h0
^ h1
]) {
259 if (remote
!= t
->parms
.iph
.daddr
||
260 key
!= t
->parms
.i_key
||
261 !(t
->dev
->flags
& IFF_UP
))
264 if (t
->dev
->type
!= ARPHRD_IPGRE
&&
265 t
->dev
->type
!= dev_type
)
269 if (t
->parms
.link
!= link
)
271 if (t
->dev
->type
!= dev_type
)
276 if (score
< cand_score
) {
282 for_each_ip_tunnel_rcu(ign
->tunnels_l
[h1
]) {
283 if ((local
!= t
->parms
.iph
.saddr
&&
284 (local
!= t
->parms
.iph
.daddr
||
285 !ipv4_is_multicast(local
))) ||
286 key
!= t
->parms
.i_key
||
287 !(t
->dev
->flags
& IFF_UP
))
290 if (t
->dev
->type
!= ARPHRD_IPGRE
&&
291 t
->dev
->type
!= dev_type
)
295 if (t
->parms
.link
!= link
)
297 if (t
->dev
->type
!= dev_type
)
302 if (score
< cand_score
) {
308 for_each_ip_tunnel_rcu(ign
->tunnels_wc
[h1
]) {
309 if (t
->parms
.i_key
!= key
||
310 !(t
->dev
->flags
& IFF_UP
))
313 if (t
->dev
->type
!= ARPHRD_IPGRE
&&
314 t
->dev
->type
!= dev_type
)
318 if (t
->parms
.link
!= link
)
320 if (t
->dev
->type
!= dev_type
)
325 if (score
< cand_score
) {
334 dev
= ign
->fb_tunnel_dev
;
335 if (dev
->flags
& IFF_UP
)
336 return netdev_priv(dev
);
341 static struct ip_tunnel __rcu
**__ipgre_bucket(struct ipgre_net
*ign
,
342 struct ip_tunnel_parm
*parms
)
344 __be32 remote
= parms
->iph
.daddr
;
345 __be32 local
= parms
->iph
.saddr
;
346 __be32 key
= parms
->i_key
;
347 unsigned int h
= HASH(key
);
352 if (remote
&& !ipv4_is_multicast(remote
)) {
357 return &ign
->tunnels
[prio
][h
];
360 static inline struct ip_tunnel __rcu
**ipgre_bucket(struct ipgre_net
*ign
,
363 return __ipgre_bucket(ign
, &t
->parms
);
366 static void ipgre_tunnel_link(struct ipgre_net
*ign
, struct ip_tunnel
*t
)
368 struct ip_tunnel __rcu
**tp
= ipgre_bucket(ign
, t
);
370 rcu_assign_pointer(t
->next
, rtnl_dereference(*tp
));
371 rcu_assign_pointer(*tp
, t
);
374 static void ipgre_tunnel_unlink(struct ipgre_net
*ign
, struct ip_tunnel
*t
)
376 struct ip_tunnel __rcu
**tp
;
377 struct ip_tunnel
*iter
;
379 for (tp
= ipgre_bucket(ign
, t
);
380 (iter
= rtnl_dereference(*tp
)) != NULL
;
383 rcu_assign_pointer(*tp
, t
->next
);
389 static struct ip_tunnel
*ipgre_tunnel_find(struct net
*net
,
390 struct ip_tunnel_parm
*parms
,
393 __be32 remote
= parms
->iph
.daddr
;
394 __be32 local
= parms
->iph
.saddr
;
395 __be32 key
= parms
->i_key
;
396 int link
= parms
->link
;
398 struct ip_tunnel __rcu
**tp
;
399 struct ipgre_net
*ign
= net_generic(net
, ipgre_net_id
);
401 for (tp
= __ipgre_bucket(ign
, parms
);
402 (t
= rtnl_dereference(*tp
)) != NULL
;
404 if (local
== t
->parms
.iph
.saddr
&&
405 remote
== t
->parms
.iph
.daddr
&&
406 key
== t
->parms
.i_key
&&
407 link
== t
->parms
.link
&&
408 type
== t
->dev
->type
)
414 static struct ip_tunnel
*ipgre_tunnel_locate(struct net
*net
,
415 struct ip_tunnel_parm
*parms
, int create
)
417 struct ip_tunnel
*t
, *nt
;
418 struct net_device
*dev
;
420 struct ipgre_net
*ign
= net_generic(net
, ipgre_net_id
);
422 t
= ipgre_tunnel_find(net
, parms
, ARPHRD_IPGRE
);
427 strlcpy(name
, parms
->name
, IFNAMSIZ
);
429 strcpy(name
, "gre%d");
431 dev
= alloc_netdev(sizeof(*t
), name
, ipgre_tunnel_setup
);
435 dev_net_set(dev
, net
);
437 nt
= netdev_priv(dev
);
439 dev
->rtnl_link_ops
= &ipgre_link_ops
;
441 dev
->mtu
= ipgre_tunnel_bind_dev(dev
);
443 if (register_netdevice(dev
) < 0)
446 /* Can use a lockless transmit, unless we generate output sequences */
447 if (!(nt
->parms
.o_flags
& GRE_SEQ
))
448 dev
->features
|= NETIF_F_LLTX
;
451 ipgre_tunnel_link(ign
, nt
);
459 static void ipgre_tunnel_uninit(struct net_device
*dev
)
461 struct net
*net
= dev_net(dev
);
462 struct ipgre_net
*ign
= net_generic(net
, ipgre_net_id
);
464 ipgre_tunnel_unlink(ign
, netdev_priv(dev
));
469 static void ipgre_err(struct sk_buff
*skb
, u32 info
)
472 /* All the routers (except for Linux) return only
473 8 bytes of packet payload. It means, that precise relaying of
474 ICMP in the real Internet is absolutely infeasible.
476 Moreover, Cisco "wise men" put GRE key to the third word
477 in GRE header. It makes impossible maintaining even soft state for keyed
478 GRE tunnels with enabled checksum. Tell them "thank you".
480 Well, I wonder, rfc1812 was written by Cisco employee,
481 what the hell these idiots break standards established
485 const struct iphdr
*iph
= (const struct iphdr
*)skb
->data
;
486 __be16
*p
= (__be16
*)(skb
->data
+(iph
->ihl
<<2));
487 int grehlen
= (iph
->ihl
<<2) + 4;
488 const int type
= icmp_hdr(skb
)->type
;
489 const int code
= icmp_hdr(skb
)->code
;
494 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
|GRE_ROUTING
|GRE_VERSION
)) {
495 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
504 /* If only 8 bytes returned, keyed message will be dropped here */
505 if (skb_headlen(skb
) < grehlen
)
510 case ICMP_PARAMETERPROB
:
513 case ICMP_DEST_UNREACH
:
516 case ICMP_PORT_UNREACH
:
517 /* Impossible event. */
520 /* All others are translated to HOST_UNREACH.
521 rfc2003 contains "deep thoughts" about NET_UNREACH,
522 I believe they are just ether pollution. --ANK
527 case ICMP_TIME_EXCEEDED
:
528 if (code
!= ICMP_EXC_TTL
)
537 t
= ipgre_tunnel_lookup(skb
->dev
, iph
->daddr
, iph
->saddr
,
539 *(((__be32
*)p
) + (grehlen
/ 4) - 1) : 0,
544 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
545 ipv4_update_pmtu(skb
, dev_net(skb
->dev
), info
,
546 t
->parms
.link
, 0, IPPROTO_GRE
, 0);
549 if (type
== ICMP_REDIRECT
) {
550 ipv4_redirect(skb
, dev_net(skb
->dev
), t
->parms
.link
, 0,
554 if (t
->parms
.iph
.daddr
== 0 ||
555 ipv4_is_multicast(t
->parms
.iph
.daddr
))
558 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
561 if (time_before(jiffies
, t
->err_time
+ IPTUNNEL_ERR_TIMEO
))
565 t
->err_time
= jiffies
;
570 static inline void ipgre_ecn_decapsulate(const struct iphdr
*iph
, struct sk_buff
*skb
)
572 if (INET_ECN_is_ce(iph
->tos
)) {
573 if (skb
->protocol
== htons(ETH_P_IP
)) {
574 IP_ECN_set_ce(ip_hdr(skb
));
575 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
576 IP6_ECN_set_ce(ipv6_hdr(skb
));
582 ipgre_ecn_encapsulate(u8 tos
, const struct iphdr
*old_iph
, struct sk_buff
*skb
)
585 if (skb
->protocol
== htons(ETH_P_IP
))
586 inner
= old_iph
->tos
;
587 else if (skb
->protocol
== htons(ETH_P_IPV6
))
588 inner
= ipv6_get_dsfield((const struct ipv6hdr
*)old_iph
);
589 return INET_ECN_encapsulate(tos
, inner
);
592 static int ipgre_rcv(struct sk_buff
*skb
)
594 const struct iphdr
*iph
;
600 struct ip_tunnel
*tunnel
;
604 if (!pskb_may_pull(skb
, 16))
609 flags
= *(__be16
*)h
;
611 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_ROUTING
|GRE_SEQ
|GRE_VERSION
)) {
612 /* - Version must be 0.
613 - We do not support routing headers.
615 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
618 if (flags
&GRE_CSUM
) {
619 switch (skb
->ip_summed
) {
620 case CHECKSUM_COMPLETE
:
621 csum
= csum_fold(skb
->csum
);
627 csum
= __skb_checksum_complete(skb
);
628 skb
->ip_summed
= CHECKSUM_COMPLETE
;
633 key
= *(__be32
*)(h
+ offset
);
637 seqno
= ntohl(*(__be32
*)(h
+ offset
));
642 gre_proto
= *(__be16
*)(h
+ 2);
645 if ((tunnel
= ipgre_tunnel_lookup(skb
->dev
,
646 iph
->saddr
, iph
->daddr
, key
,
648 struct pcpu_tstats
*tstats
;
652 skb
->protocol
= gre_proto
;
653 /* WCCP version 1 and 2 protocol decoding.
654 * - Change protocol to IP
655 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
657 if (flags
== 0 && gre_proto
== htons(ETH_P_WCCP
)) {
658 skb
->protocol
= htons(ETH_P_IP
);
659 if ((*(h
+ offset
) & 0xF0) != 0x40)
663 skb
->mac_header
= skb
->network_header
;
664 __pskb_pull(skb
, offset
);
665 skb_postpull_rcsum(skb
, skb_transport_header(skb
), offset
);
666 skb
->pkt_type
= PACKET_HOST
;
667 #ifdef CONFIG_NET_IPGRE_BROADCAST
668 if (ipv4_is_multicast(iph
->daddr
)) {
669 /* Looped back packet, drop it! */
670 if (rt_is_output_route(skb_rtable(skb
)))
672 tunnel
->dev
->stats
.multicast
++;
673 skb
->pkt_type
= PACKET_BROADCAST
;
677 if (((flags
&GRE_CSUM
) && csum
) ||
678 (!(flags
&GRE_CSUM
) && tunnel
->parms
.i_flags
&GRE_CSUM
)) {
679 tunnel
->dev
->stats
.rx_crc_errors
++;
680 tunnel
->dev
->stats
.rx_errors
++;
683 if (tunnel
->parms
.i_flags
&GRE_SEQ
) {
684 if (!(flags
&GRE_SEQ
) ||
685 (tunnel
->i_seqno
&& (s32
)(seqno
- tunnel
->i_seqno
) < 0)) {
686 tunnel
->dev
->stats
.rx_fifo_errors
++;
687 tunnel
->dev
->stats
.rx_errors
++;
690 tunnel
->i_seqno
= seqno
+ 1;
693 /* Warning: All skb pointers will be invalidated! */
694 if (tunnel
->dev
->type
== ARPHRD_ETHER
) {
695 if (!pskb_may_pull(skb
, ETH_HLEN
)) {
696 tunnel
->dev
->stats
.rx_length_errors
++;
697 tunnel
->dev
->stats
.rx_errors
++;
702 skb
->protocol
= eth_type_trans(skb
, tunnel
->dev
);
703 skb_postpull_rcsum(skb
, eth_hdr(skb
), ETH_HLEN
);
706 tstats
= this_cpu_ptr(tunnel
->dev
->tstats
);
707 u64_stats_update_begin(&tstats
->syncp
);
708 tstats
->rx_packets
++;
709 tstats
->rx_bytes
+= skb
->len
;
710 u64_stats_update_end(&tstats
->syncp
);
712 __skb_tunnel_rx(skb
, tunnel
->dev
);
714 skb_reset_network_header(skb
);
715 ipgre_ecn_decapsulate(iph
, skb
);
722 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PORT_UNREACH
, 0);
731 static netdev_tx_t
ipgre_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
733 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
734 struct pcpu_tstats
*tstats
;
735 const struct iphdr
*old_iph
= ip_hdr(skb
);
736 const struct iphdr
*tiph
;
740 struct rtable
*rt
; /* Route to the other host */
741 struct net_device
*tdev
; /* Device to other host */
742 struct iphdr
*iph
; /* Our new IP header */
743 unsigned int max_headroom
; /* The extra header space needed */
748 if (dev
->type
== ARPHRD_ETHER
)
749 IPCB(skb
)->flags
= 0;
751 if (dev
->header_ops
&& dev
->type
== ARPHRD_IPGRE
) {
753 tiph
= (const struct iphdr
*)skb
->data
;
755 gre_hlen
= tunnel
->hlen
;
756 tiph
= &tunnel
->parms
.iph
;
759 if ((dst
= tiph
->daddr
) == 0) {
762 if (skb_dst(skb
) == NULL
) {
763 dev
->stats
.tx_fifo_errors
++;
767 if (skb
->protocol
== htons(ETH_P_IP
)) {
768 rt
= skb_rtable(skb
);
769 dst
= rt_nexthop(rt
, old_iph
->daddr
);
771 #if IS_ENABLED(CONFIG_IPV6)
772 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
773 const struct in6_addr
*addr6
;
774 struct neighbour
*neigh
;
775 bool do_tx_error_icmp
;
778 neigh
= dst_neigh_lookup(skb_dst(skb
), &ipv6_hdr(skb
)->daddr
);
782 addr6
= (const struct in6_addr
*)&neigh
->primary_key
;
783 addr_type
= ipv6_addr_type(addr6
);
785 if (addr_type
== IPV6_ADDR_ANY
) {
786 addr6
= &ipv6_hdr(skb
)->daddr
;
787 addr_type
= ipv6_addr_type(addr6
);
790 if ((addr_type
& IPV6_ADDR_COMPATv4
) == 0)
791 do_tx_error_icmp
= true;
793 do_tx_error_icmp
= false;
794 dst
= addr6
->s6_addr32
[3];
796 neigh_release(neigh
);
797 if (do_tx_error_icmp
)
808 if (skb
->protocol
== htons(ETH_P_IP
))
810 else if (skb
->protocol
== htons(ETH_P_IPV6
))
811 tos
= ipv6_get_dsfield((const struct ipv6hdr
*)old_iph
);
814 rt
= ip_route_output_gre(dev_net(dev
), &fl4
, dst
, tiph
->saddr
,
815 tunnel
->parms
.o_key
, RT_TOS(tos
),
818 dev
->stats
.tx_carrier_errors
++;
825 dev
->stats
.collisions
++;
831 mtu
= dst_mtu(&rt
->dst
) - dev
->hard_header_len
- tunnel
->hlen
;
833 mtu
= skb_dst(skb
) ? dst_mtu(skb_dst(skb
)) : dev
->mtu
;
836 skb_dst(skb
)->ops
->update_pmtu(skb_dst(skb
), NULL
, skb
, mtu
);
838 if (skb
->protocol
== htons(ETH_P_IP
)) {
839 df
|= (old_iph
->frag_off
&htons(IP_DF
));
841 if ((old_iph
->frag_off
&htons(IP_DF
)) &&
842 mtu
< ntohs(old_iph
->tot_len
)) {
843 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_FRAG_NEEDED
, htonl(mtu
));
848 #if IS_ENABLED(CONFIG_IPV6)
849 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
850 struct rt6_info
*rt6
= (struct rt6_info
*)skb_dst(skb
);
852 if (rt6
&& mtu
< dst_mtu(skb_dst(skb
)) && mtu
>= IPV6_MIN_MTU
) {
853 if ((tunnel
->parms
.iph
.daddr
&&
854 !ipv4_is_multicast(tunnel
->parms
.iph
.daddr
)) ||
855 rt6
->rt6i_dst
.plen
== 128) {
856 rt6
->rt6i_flags
|= RTF_MODIFIED
;
857 dst_metric_set(skb_dst(skb
), RTAX_MTU
, mtu
);
861 if (mtu
>= IPV6_MIN_MTU
&& mtu
< skb
->len
- tunnel
->hlen
+ gre_hlen
) {
862 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
869 if (tunnel
->err_count
> 0) {
870 if (time_before(jiffies
,
871 tunnel
->err_time
+ IPTUNNEL_ERR_TIMEO
)) {
874 dst_link_failure(skb
);
876 tunnel
->err_count
= 0;
879 max_headroom
= LL_RESERVED_SPACE(tdev
) + gre_hlen
+ rt
->dst
.header_len
;
881 if (skb_headroom(skb
) < max_headroom
|| skb_shared(skb
)||
882 (skb_cloned(skb
) && !skb_clone_writable(skb
, 0))) {
883 struct sk_buff
*new_skb
= skb_realloc_headroom(skb
, max_headroom
);
884 if (max_headroom
> dev
->needed_headroom
)
885 dev
->needed_headroom
= max_headroom
;
888 dev
->stats
.tx_dropped
++;
893 skb_set_owner_w(new_skb
, skb
->sk
);
896 old_iph
= ip_hdr(skb
);
899 skb_reset_transport_header(skb
);
900 skb_push(skb
, gre_hlen
);
901 skb_reset_network_header(skb
);
902 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
903 IPCB(skb
)->flags
&= ~(IPSKB_XFRM_TUNNEL_SIZE
| IPSKB_XFRM_TRANSFORMED
|
906 skb_dst_set(skb
, &rt
->dst
);
909 * Push down and install the IPIP header.
914 iph
->ihl
= sizeof(struct iphdr
) >> 2;
916 iph
->protocol
= IPPROTO_GRE
;
917 iph
->tos
= ipgre_ecn_encapsulate(tos
, old_iph
, skb
);
918 iph
->daddr
= fl4
.daddr
;
919 iph
->saddr
= fl4
.saddr
;
921 if ((iph
->ttl
= tiph
->ttl
) == 0) {
922 if (skb
->protocol
== htons(ETH_P_IP
))
923 iph
->ttl
= old_iph
->ttl
;
924 #if IS_ENABLED(CONFIG_IPV6)
925 else if (skb
->protocol
== htons(ETH_P_IPV6
))
926 iph
->ttl
= ((const struct ipv6hdr
*)old_iph
)->hop_limit
;
929 iph
->ttl
= ip4_dst_hoplimit(&rt
->dst
);
932 ((__be16
*)(iph
+ 1))[0] = tunnel
->parms
.o_flags
;
933 ((__be16
*)(iph
+ 1))[1] = (dev
->type
== ARPHRD_ETHER
) ?
934 htons(ETH_P_TEB
) : skb
->protocol
;
936 if (tunnel
->parms
.o_flags
&(GRE_KEY
|GRE_CSUM
|GRE_SEQ
)) {
937 __be32
*ptr
= (__be32
*)(((u8
*)iph
) + tunnel
->hlen
- 4);
939 if (tunnel
->parms
.o_flags
&GRE_SEQ
) {
941 *ptr
= htonl(tunnel
->o_seqno
);
944 if (tunnel
->parms
.o_flags
&GRE_KEY
) {
945 *ptr
= tunnel
->parms
.o_key
;
948 if (tunnel
->parms
.o_flags
&GRE_CSUM
) {
950 *(__sum16
*)ptr
= ip_compute_csum((void *)(iph
+1), skb
->len
- sizeof(struct iphdr
));
955 tstats
= this_cpu_ptr(dev
->tstats
);
956 __IPTUNNEL_XMIT(tstats
, &dev
->stats
);
959 #if IS_ENABLED(CONFIG_IPV6)
961 dst_link_failure(skb
);
964 dev
->stats
.tx_errors
++;
969 static int ipgre_tunnel_bind_dev(struct net_device
*dev
)
971 struct net_device
*tdev
= NULL
;
972 struct ip_tunnel
*tunnel
;
973 const struct iphdr
*iph
;
974 int hlen
= LL_MAX_HEADER
;
975 int mtu
= ETH_DATA_LEN
;
976 int addend
= sizeof(struct iphdr
) + 4;
978 tunnel
= netdev_priv(dev
);
979 iph
= &tunnel
->parms
.iph
;
981 /* Guess output device to choose reasonable mtu and needed_headroom */
987 rt
= ip_route_output_gre(dev_net(dev
), &fl4
,
988 iph
->daddr
, iph
->saddr
,
997 if (dev
->type
!= ARPHRD_ETHER
)
998 dev
->flags
|= IFF_POINTOPOINT
;
1001 if (!tdev
&& tunnel
->parms
.link
)
1002 tdev
= __dev_get_by_index(dev_net(dev
), tunnel
->parms
.link
);
1005 hlen
= tdev
->hard_header_len
+ tdev
->needed_headroom
;
1008 dev
->iflink
= tunnel
->parms
.link
;
1010 /* Precalculate GRE options length */
1011 if (tunnel
->parms
.o_flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
)) {
1012 if (tunnel
->parms
.o_flags
&GRE_CSUM
)
1014 if (tunnel
->parms
.o_flags
&GRE_KEY
)
1016 if (tunnel
->parms
.o_flags
&GRE_SEQ
)
1019 dev
->needed_headroom
= addend
+ hlen
;
1020 mtu
-= dev
->hard_header_len
+ addend
;
1025 tunnel
->hlen
= addend
;
1031 ipgre_tunnel_ioctl (struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
1034 struct ip_tunnel_parm p
;
1035 struct ip_tunnel
*t
;
1036 struct net
*net
= dev_net(dev
);
1037 struct ipgre_net
*ign
= net_generic(net
, ipgre_net_id
);
1042 if (dev
== ign
->fb_tunnel_dev
) {
1043 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
))) {
1047 t
= ipgre_tunnel_locate(net
, &p
, 0);
1050 t
= netdev_priv(dev
);
1051 memcpy(&p
, &t
->parms
, sizeof(p
));
1052 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
1059 if (!capable(CAP_NET_ADMIN
))
1063 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
1067 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_GRE
||
1068 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&htons(~IP_DF
)) ||
1069 ((p
.i_flags
|p
.o_flags
)&(GRE_VERSION
|GRE_ROUTING
)))
1072 p
.iph
.frag_off
|= htons(IP_DF
);
1074 if (!(p
.i_flags
&GRE_KEY
))
1076 if (!(p
.o_flags
&GRE_KEY
))
1079 t
= ipgre_tunnel_locate(net
, &p
, cmd
== SIOCADDTUNNEL
);
1081 if (dev
!= ign
->fb_tunnel_dev
&& cmd
== SIOCCHGTUNNEL
) {
1083 if (t
->dev
!= dev
) {
1088 unsigned int nflags
= 0;
1090 t
= netdev_priv(dev
);
1092 if (ipv4_is_multicast(p
.iph
.daddr
))
1093 nflags
= IFF_BROADCAST
;
1094 else if (p
.iph
.daddr
)
1095 nflags
= IFF_POINTOPOINT
;
1097 if ((dev
->flags
^nflags
)&(IFF_POINTOPOINT
|IFF_BROADCAST
)) {
1101 ipgre_tunnel_unlink(ign
, t
);
1103 t
->parms
.iph
.saddr
= p
.iph
.saddr
;
1104 t
->parms
.iph
.daddr
= p
.iph
.daddr
;
1105 t
->parms
.i_key
= p
.i_key
;
1106 t
->parms
.o_key
= p
.o_key
;
1107 memcpy(dev
->dev_addr
, &p
.iph
.saddr
, 4);
1108 memcpy(dev
->broadcast
, &p
.iph
.daddr
, 4);
1109 ipgre_tunnel_link(ign
, t
);
1110 netdev_state_change(dev
);
1116 if (cmd
== SIOCCHGTUNNEL
) {
1117 t
->parms
.iph
.ttl
= p
.iph
.ttl
;
1118 t
->parms
.iph
.tos
= p
.iph
.tos
;
1119 t
->parms
.iph
.frag_off
= p
.iph
.frag_off
;
1120 if (t
->parms
.link
!= p
.link
) {
1121 t
->parms
.link
= p
.link
;
1122 dev
->mtu
= ipgre_tunnel_bind_dev(dev
);
1123 netdev_state_change(dev
);
1126 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &t
->parms
, sizeof(p
)))
1129 err
= (cmd
== SIOCADDTUNNEL
? -ENOBUFS
: -ENOENT
);
1134 if (!capable(CAP_NET_ADMIN
))
1137 if (dev
== ign
->fb_tunnel_dev
) {
1139 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
1142 if ((t
= ipgre_tunnel_locate(net
, &p
, 0)) == NULL
)
1145 if (t
== netdev_priv(ign
->fb_tunnel_dev
))
1149 unregister_netdevice(dev
);
1161 static int ipgre_tunnel_change_mtu(struct net_device
*dev
, int new_mtu
)
1163 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1165 new_mtu
> 0xFFF8 - dev
->hard_header_len
- tunnel
->hlen
)
1171 /* Nice toy. Unfortunately, useless in real life :-)
1172 It allows to construct virtual multiprotocol broadcast "LAN"
1173 over the Internet, provided multicast routing is tuned.
1176 I have no idea was this bicycle invented before me,
1177 so that I had to set ARPHRD_IPGRE to a random value.
1178 I have an impression, that Cisco could make something similar,
1179 but this feature is apparently missing in IOS<=11.2(8).
1181 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1182 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1184 ping -t 255 224.66.66.66
1186 If nobody answers, mbone does not work.
1188 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1189 ip addr add 10.66.66.<somewhat>/24 dev Universe
1190 ifconfig Universe up
1191 ifconfig Universe add fe80::<Your_real_addr>/10
1192 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1195 ftp fec0:6666:6666::193.233.7.65
1200 static int ipgre_header(struct sk_buff
*skb
, struct net_device
*dev
,
1201 unsigned short type
,
1202 const void *daddr
, const void *saddr
, unsigned int len
)
1204 struct ip_tunnel
*t
= netdev_priv(dev
);
1205 struct iphdr
*iph
= (struct iphdr
*)skb_push(skb
, t
->hlen
);
1206 __be16
*p
= (__be16
*)(iph
+1);
1208 memcpy(iph
, &t
->parms
.iph
, sizeof(struct iphdr
));
1209 p
[0] = t
->parms
.o_flags
;
1213 * Set the source hardware address.
1217 memcpy(&iph
->saddr
, saddr
, 4);
1219 memcpy(&iph
->daddr
, daddr
, 4);
1226 static int ipgre_header_parse(const struct sk_buff
*skb
, unsigned char *haddr
)
1228 const struct iphdr
*iph
= (const struct iphdr
*) skb_mac_header(skb
);
1229 memcpy(haddr
, &iph
->saddr
, 4);
1233 static const struct header_ops ipgre_header_ops
= {
1234 .create
= ipgre_header
,
1235 .parse
= ipgre_header_parse
,
1238 #ifdef CONFIG_NET_IPGRE_BROADCAST
1239 static int ipgre_open(struct net_device
*dev
)
1241 struct ip_tunnel
*t
= netdev_priv(dev
);
1243 if (ipv4_is_multicast(t
->parms
.iph
.daddr
)) {
1247 rt
= ip_route_output_gre(dev_net(dev
), &fl4
,
1251 RT_TOS(t
->parms
.iph
.tos
),
1254 return -EADDRNOTAVAIL
;
1257 if (__in_dev_get_rtnl(dev
) == NULL
)
1258 return -EADDRNOTAVAIL
;
1259 t
->mlink
= dev
->ifindex
;
1260 ip_mc_inc_group(__in_dev_get_rtnl(dev
), t
->parms
.iph
.daddr
);
1265 static int ipgre_close(struct net_device
*dev
)
1267 struct ip_tunnel
*t
= netdev_priv(dev
);
1269 if (ipv4_is_multicast(t
->parms
.iph
.daddr
) && t
->mlink
) {
1270 struct in_device
*in_dev
;
1271 in_dev
= inetdev_by_index(dev_net(dev
), t
->mlink
);
1273 ip_mc_dec_group(in_dev
, t
->parms
.iph
.daddr
);
1280 static const struct net_device_ops ipgre_netdev_ops
= {
1281 .ndo_init
= ipgre_tunnel_init
,
1282 .ndo_uninit
= ipgre_tunnel_uninit
,
1283 #ifdef CONFIG_NET_IPGRE_BROADCAST
1284 .ndo_open
= ipgre_open
,
1285 .ndo_stop
= ipgre_close
,
1287 .ndo_start_xmit
= ipgre_tunnel_xmit
,
1288 .ndo_do_ioctl
= ipgre_tunnel_ioctl
,
1289 .ndo_change_mtu
= ipgre_tunnel_change_mtu
,
1290 .ndo_get_stats64
= ipgre_get_stats64
,
1293 static void ipgre_dev_free(struct net_device
*dev
)
1295 free_percpu(dev
->tstats
);
1299 static void ipgre_tunnel_setup(struct net_device
*dev
)
1301 dev
->netdev_ops
= &ipgre_netdev_ops
;
1302 dev
->destructor
= ipgre_dev_free
;
1304 dev
->type
= ARPHRD_IPGRE
;
1305 dev
->needed_headroom
= LL_MAX_HEADER
+ sizeof(struct iphdr
) + 4;
1306 dev
->mtu
= ETH_DATA_LEN
- sizeof(struct iphdr
) - 4;
1307 dev
->flags
= IFF_NOARP
;
1310 dev
->features
|= NETIF_F_NETNS_LOCAL
;
1311 dev
->priv_flags
&= ~IFF_XMIT_DST_RELEASE
;
1314 static int ipgre_tunnel_init(struct net_device
*dev
)
1316 struct ip_tunnel
*tunnel
;
1319 tunnel
= netdev_priv(dev
);
1320 iph
= &tunnel
->parms
.iph
;
1323 strcpy(tunnel
->parms
.name
, dev
->name
);
1325 memcpy(dev
->dev_addr
, &tunnel
->parms
.iph
.saddr
, 4);
1326 memcpy(dev
->broadcast
, &tunnel
->parms
.iph
.daddr
, 4);
1329 #ifdef CONFIG_NET_IPGRE_BROADCAST
1330 if (ipv4_is_multicast(iph
->daddr
)) {
1333 dev
->flags
= IFF_BROADCAST
;
1334 dev
->header_ops
= &ipgre_header_ops
;
1338 dev
->header_ops
= &ipgre_header_ops
;
1340 dev
->tstats
= alloc_percpu(struct pcpu_tstats
);
1347 static void ipgre_fb_tunnel_init(struct net_device
*dev
)
1349 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1350 struct iphdr
*iph
= &tunnel
->parms
.iph
;
1353 strcpy(tunnel
->parms
.name
, dev
->name
);
1356 iph
->protocol
= IPPROTO_GRE
;
1358 tunnel
->hlen
= sizeof(struct iphdr
) + 4;
1364 static const struct gre_protocol ipgre_protocol
= {
1365 .handler
= ipgre_rcv
,
1366 .err_handler
= ipgre_err
,
1369 static void ipgre_destroy_tunnels(struct ipgre_net
*ign
, struct list_head
*head
)
1373 for (prio
= 0; prio
< 4; prio
++) {
1375 for (h
= 0; h
< HASH_SIZE
; h
++) {
1376 struct ip_tunnel
*t
;
1378 t
= rtnl_dereference(ign
->tunnels
[prio
][h
]);
1381 unregister_netdevice_queue(t
->dev
, head
);
1382 t
= rtnl_dereference(t
->next
);
1388 static int __net_init
ipgre_init_net(struct net
*net
)
1390 struct ipgre_net
*ign
= net_generic(net
, ipgre_net_id
);
1393 ign
->fb_tunnel_dev
= alloc_netdev(sizeof(struct ip_tunnel
), "gre0",
1394 ipgre_tunnel_setup
);
1395 if (!ign
->fb_tunnel_dev
) {
1399 dev_net_set(ign
->fb_tunnel_dev
, net
);
1401 ipgre_fb_tunnel_init(ign
->fb_tunnel_dev
);
1402 ign
->fb_tunnel_dev
->rtnl_link_ops
= &ipgre_link_ops
;
1404 if ((err
= register_netdev(ign
->fb_tunnel_dev
)))
1407 rcu_assign_pointer(ign
->tunnels_wc
[0],
1408 netdev_priv(ign
->fb_tunnel_dev
));
1412 ipgre_dev_free(ign
->fb_tunnel_dev
);
1417 static void __net_exit
ipgre_exit_net(struct net
*net
)
1419 struct ipgre_net
*ign
;
1422 ign
= net_generic(net
, ipgre_net_id
);
1424 ipgre_destroy_tunnels(ign
, &list
);
1425 unregister_netdevice_many(&list
);
1429 static struct pernet_operations ipgre_net_ops
= {
1430 .init
= ipgre_init_net
,
1431 .exit
= ipgre_exit_net
,
1432 .id
= &ipgre_net_id
,
1433 .size
= sizeof(struct ipgre_net
),
1436 static int ipgre_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
1444 if (data
[IFLA_GRE_IFLAGS
])
1445 flags
|= nla_get_be16(data
[IFLA_GRE_IFLAGS
]);
1446 if (data
[IFLA_GRE_OFLAGS
])
1447 flags
|= nla_get_be16(data
[IFLA_GRE_OFLAGS
]);
1448 if (flags
& (GRE_VERSION
|GRE_ROUTING
))
1454 static int ipgre_tap_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
1458 if (tb
[IFLA_ADDRESS
]) {
1459 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
)
1461 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
])))
1462 return -EADDRNOTAVAIL
;
1468 if (data
[IFLA_GRE_REMOTE
]) {
1469 memcpy(&daddr
, nla_data(data
[IFLA_GRE_REMOTE
]), 4);
1475 return ipgre_tunnel_validate(tb
, data
);
1478 static void ipgre_netlink_parms(struct nlattr
*data
[],
1479 struct ip_tunnel_parm
*parms
)
1481 memset(parms
, 0, sizeof(*parms
));
1483 parms
->iph
.protocol
= IPPROTO_GRE
;
1488 if (data
[IFLA_GRE_LINK
])
1489 parms
->link
= nla_get_u32(data
[IFLA_GRE_LINK
]);
1491 if (data
[IFLA_GRE_IFLAGS
])
1492 parms
->i_flags
= nla_get_be16(data
[IFLA_GRE_IFLAGS
]);
1494 if (data
[IFLA_GRE_OFLAGS
])
1495 parms
->o_flags
= nla_get_be16(data
[IFLA_GRE_OFLAGS
]);
1497 if (data
[IFLA_GRE_IKEY
])
1498 parms
->i_key
= nla_get_be32(data
[IFLA_GRE_IKEY
]);
1500 if (data
[IFLA_GRE_OKEY
])
1501 parms
->o_key
= nla_get_be32(data
[IFLA_GRE_OKEY
]);
1503 if (data
[IFLA_GRE_LOCAL
])
1504 parms
->iph
.saddr
= nla_get_be32(data
[IFLA_GRE_LOCAL
]);
1506 if (data
[IFLA_GRE_REMOTE
])
1507 parms
->iph
.daddr
= nla_get_be32(data
[IFLA_GRE_REMOTE
]);
1509 if (data
[IFLA_GRE_TTL
])
1510 parms
->iph
.ttl
= nla_get_u8(data
[IFLA_GRE_TTL
]);
1512 if (data
[IFLA_GRE_TOS
])
1513 parms
->iph
.tos
= nla_get_u8(data
[IFLA_GRE_TOS
]);
1515 if (!data
[IFLA_GRE_PMTUDISC
] || nla_get_u8(data
[IFLA_GRE_PMTUDISC
]))
1516 parms
->iph
.frag_off
= htons(IP_DF
);
1519 static int ipgre_tap_init(struct net_device
*dev
)
1521 struct ip_tunnel
*tunnel
;
1523 tunnel
= netdev_priv(dev
);
1526 strcpy(tunnel
->parms
.name
, dev
->name
);
1528 ipgre_tunnel_bind_dev(dev
);
1530 dev
->tstats
= alloc_percpu(struct pcpu_tstats
);
1537 static const struct net_device_ops ipgre_tap_netdev_ops
= {
1538 .ndo_init
= ipgre_tap_init
,
1539 .ndo_uninit
= ipgre_tunnel_uninit
,
1540 .ndo_start_xmit
= ipgre_tunnel_xmit
,
1541 .ndo_set_mac_address
= eth_mac_addr
,
1542 .ndo_validate_addr
= eth_validate_addr
,
1543 .ndo_change_mtu
= ipgre_tunnel_change_mtu
,
1544 .ndo_get_stats64
= ipgre_get_stats64
,
1547 static void ipgre_tap_setup(struct net_device
*dev
)
1552 dev
->netdev_ops
= &ipgre_tap_netdev_ops
;
1553 dev
->destructor
= ipgre_dev_free
;
1556 dev
->features
|= NETIF_F_NETNS_LOCAL
;
1559 static int ipgre_newlink(struct net
*src_net
, struct net_device
*dev
, struct nlattr
*tb
[],
1560 struct nlattr
*data
[])
1562 struct ip_tunnel
*nt
;
1563 struct net
*net
= dev_net(dev
);
1564 struct ipgre_net
*ign
= net_generic(net
, ipgre_net_id
);
1568 nt
= netdev_priv(dev
);
1569 ipgre_netlink_parms(data
, &nt
->parms
);
1571 if (ipgre_tunnel_find(net
, &nt
->parms
, dev
->type
))
1574 if (dev
->type
== ARPHRD_ETHER
&& !tb
[IFLA_ADDRESS
])
1575 eth_hw_addr_random(dev
);
1577 mtu
= ipgre_tunnel_bind_dev(dev
);
1581 /* Can use a lockless transmit, unless we generate output sequences */
1582 if (!(nt
->parms
.o_flags
& GRE_SEQ
))
1583 dev
->features
|= NETIF_F_LLTX
;
1585 err
= register_netdevice(dev
);
1590 ipgre_tunnel_link(ign
, nt
);
1596 static int ipgre_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
1597 struct nlattr
*data
[])
1599 struct ip_tunnel
*t
, *nt
;
1600 struct net
*net
= dev_net(dev
);
1601 struct ipgre_net
*ign
= net_generic(net
, ipgre_net_id
);
1602 struct ip_tunnel_parm p
;
1605 if (dev
== ign
->fb_tunnel_dev
)
1608 nt
= netdev_priv(dev
);
1609 ipgre_netlink_parms(data
, &p
);
1611 t
= ipgre_tunnel_locate(net
, &p
, 0);
1619 if (dev
->type
!= ARPHRD_ETHER
) {
1620 unsigned int nflags
= 0;
1622 if (ipv4_is_multicast(p
.iph
.daddr
))
1623 nflags
= IFF_BROADCAST
;
1624 else if (p
.iph
.daddr
)
1625 nflags
= IFF_POINTOPOINT
;
1627 if ((dev
->flags
^ nflags
) &
1628 (IFF_POINTOPOINT
| IFF_BROADCAST
))
1632 ipgre_tunnel_unlink(ign
, t
);
1633 t
->parms
.iph
.saddr
= p
.iph
.saddr
;
1634 t
->parms
.iph
.daddr
= p
.iph
.daddr
;
1635 t
->parms
.i_key
= p
.i_key
;
1636 if (dev
->type
!= ARPHRD_ETHER
) {
1637 memcpy(dev
->dev_addr
, &p
.iph
.saddr
, 4);
1638 memcpy(dev
->broadcast
, &p
.iph
.daddr
, 4);
1640 ipgre_tunnel_link(ign
, t
);
1641 netdev_state_change(dev
);
1644 t
->parms
.o_key
= p
.o_key
;
1645 t
->parms
.iph
.ttl
= p
.iph
.ttl
;
1646 t
->parms
.iph
.tos
= p
.iph
.tos
;
1647 t
->parms
.iph
.frag_off
= p
.iph
.frag_off
;
1649 if (t
->parms
.link
!= p
.link
) {
1650 t
->parms
.link
= p
.link
;
1651 mtu
= ipgre_tunnel_bind_dev(dev
);
1654 netdev_state_change(dev
);
1660 static size_t ipgre_get_size(const struct net_device
*dev
)
1665 /* IFLA_GRE_IFLAGS */
1667 /* IFLA_GRE_OFLAGS */
1673 /* IFLA_GRE_LOCAL */
1675 /* IFLA_GRE_REMOTE */
1681 /* IFLA_GRE_PMTUDISC */
1686 static int ipgre_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
1688 struct ip_tunnel
*t
= netdev_priv(dev
);
1689 struct ip_tunnel_parm
*p
= &t
->parms
;
1691 if (nla_put_u32(skb
, IFLA_GRE_LINK
, p
->link
) ||
1692 nla_put_be16(skb
, IFLA_GRE_IFLAGS
, p
->i_flags
) ||
1693 nla_put_be16(skb
, IFLA_GRE_OFLAGS
, p
->o_flags
) ||
1694 nla_put_be32(skb
, IFLA_GRE_IKEY
, p
->i_key
) ||
1695 nla_put_be32(skb
, IFLA_GRE_OKEY
, p
->o_key
) ||
1696 nla_put_be32(skb
, IFLA_GRE_LOCAL
, p
->iph
.saddr
) ||
1697 nla_put_be32(skb
, IFLA_GRE_REMOTE
, p
->iph
.daddr
) ||
1698 nla_put_u8(skb
, IFLA_GRE_TTL
, p
->iph
.ttl
) ||
1699 nla_put_u8(skb
, IFLA_GRE_TOS
, p
->iph
.tos
) ||
1700 nla_put_u8(skb
, IFLA_GRE_PMTUDISC
,
1701 !!(p
->iph
.frag_off
& htons(IP_DF
))))
1702 goto nla_put_failure
;
1709 static const struct nla_policy ipgre_policy
[IFLA_GRE_MAX
+ 1] = {
1710 [IFLA_GRE_LINK
] = { .type
= NLA_U32
},
1711 [IFLA_GRE_IFLAGS
] = { .type
= NLA_U16
},
1712 [IFLA_GRE_OFLAGS
] = { .type
= NLA_U16
},
1713 [IFLA_GRE_IKEY
] = { .type
= NLA_U32
},
1714 [IFLA_GRE_OKEY
] = { .type
= NLA_U32
},
1715 [IFLA_GRE_LOCAL
] = { .len
= FIELD_SIZEOF(struct iphdr
, saddr
) },
1716 [IFLA_GRE_REMOTE
] = { .len
= FIELD_SIZEOF(struct iphdr
, daddr
) },
1717 [IFLA_GRE_TTL
] = { .type
= NLA_U8
},
1718 [IFLA_GRE_TOS
] = { .type
= NLA_U8
},
1719 [IFLA_GRE_PMTUDISC
] = { .type
= NLA_U8
},
1722 static struct rtnl_link_ops ipgre_link_ops __read_mostly
= {
1724 .maxtype
= IFLA_GRE_MAX
,
1725 .policy
= ipgre_policy
,
1726 .priv_size
= sizeof(struct ip_tunnel
),
1727 .setup
= ipgre_tunnel_setup
,
1728 .validate
= ipgre_tunnel_validate
,
1729 .newlink
= ipgre_newlink
,
1730 .changelink
= ipgre_changelink
,
1731 .get_size
= ipgre_get_size
,
1732 .fill_info
= ipgre_fill_info
,
1735 static struct rtnl_link_ops ipgre_tap_ops __read_mostly
= {
1737 .maxtype
= IFLA_GRE_MAX
,
1738 .policy
= ipgre_policy
,
1739 .priv_size
= sizeof(struct ip_tunnel
),
1740 .setup
= ipgre_tap_setup
,
1741 .validate
= ipgre_tap_validate
,
1742 .newlink
= ipgre_newlink
,
1743 .changelink
= ipgre_changelink
,
1744 .get_size
= ipgre_get_size
,
1745 .fill_info
= ipgre_fill_info
,
1749 * And now the modules code and kernel interface.
1752 static int __init
ipgre_init(void)
1756 pr_info("GRE over IPv4 tunneling driver\n");
1758 err
= register_pernet_device(&ipgre_net_ops
);
1762 err
= gre_add_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1764 pr_info("%s: can't add protocol\n", __func__
);
1765 goto add_proto_failed
;
1768 err
= rtnl_link_register(&ipgre_link_ops
);
1770 goto rtnl_link_failed
;
1772 err
= rtnl_link_register(&ipgre_tap_ops
);
1774 goto tap_ops_failed
;
1780 rtnl_link_unregister(&ipgre_link_ops
);
1782 gre_del_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1784 unregister_pernet_device(&ipgre_net_ops
);
1788 static void __exit
ipgre_fini(void)
1790 rtnl_link_unregister(&ipgre_tap_ops
);
1791 rtnl_link_unregister(&ipgre_link_ops
);
1792 if (gre_del_protocol(&ipgre_protocol
, GREPROTO_CISCO
) < 0)
1793 pr_info("%s: can't remove protocol\n", __func__
);
1794 unregister_pernet_device(&ipgre_net_ops
);
1797 module_init(ipgre_init
);
1798 module_exit(ipgre_fini
);
1799 MODULE_LICENSE("GPL");
1800 MODULE_ALIAS_RTNL_LINK("gre");
1801 MODULE_ALIAS_RTNL_LINK("gretap");
1802 MODULE_ALIAS_NETDEV("gre0");