2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <asm/uaccess.h>
19 #include <linux/skbuff.h>
20 #include <linux/netdevice.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/if_arp.h>
25 #include <linux/mroute.h>
26 #include <linux/init.h>
27 #include <linux/in6.h>
28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h>
34 #include <net/protocol.h>
37 #include <net/checksum.h>
41 #include <net/ip6_fib.h>
42 #include <net/ip6_route.h>
49 1. The most important issue is detecting local dead loops.
50 They would cause complete host lockup in transmit, which
51 would be "resolved" by stack overflow or, if queueing is enabled,
52 with infinite looping in net_bh.
54 We cannot track such dead loops during route installation,
55 it is infeasible task. The most general solutions would be
56 to keep skb->encapsulation counter (sort of local ttl),
57 and silently drop packet when it expires. It is the best
58 solution, but it supposes maintaing new variable in ALL
59 skb, even if no tunneling is used.
61 Current solution: t->recursion lock breaks dead loops. It looks
62 like dev->tbusy flag, but I preferred new variable, because
63 the semantics is different. One day, when hard_start_xmit
64 will be multithreaded we will have to use skb->encapsulation.
68 2. Networking dead loops would not kill routers, but would really
69 kill network. IP hop limit plays role of "t->recursion" in this case,
70 if we copy it from packet being encapsulated to upper header.
71 It is very good solution, but it introduces two problems:
73 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74 do not work over tunnels.
75 - traceroute does not work. I planned to relay ICMP from tunnel,
76 so that this problem would be solved and traceroute output
77 would even more informative. This idea appeared to be wrong:
78 only Linux complies to rfc1812 now (yes, guys, Linux is the only
79 true router now :-)), all routers (at least, in neighbourhood of mine)
80 return only 8 bytes of payload. It is the end.
82 Hence, if we want that OSPF worked or traceroute said something reasonable,
83 we should search for another solution.
85 One of them is to parse packet trying to detect inner encapsulation
86 made by our node. It is difficult or even impossible, especially,
87 taking into account fragmentation. TO be short, tt is not solution at all.
89 Current solution: The solution was UNEXPECTEDLY SIMPLE.
90 We force DF flag on tunnels with preconfigured hop limit,
91 that is ALL. :-) Well, it does not remove the problem completely,
92 but exponential growth of network traffic is changed to linear
93 (branches, that exceed pmtu are pruned) and tunnel mtu
94 fastly degrades to value <68, where looping stops.
95 Yes, it is not good if there exists a router in the loop,
96 which does not force DF, even when encapsulating packets have DF set.
97 But it is not our problem! Nobody could accuse us, we made
98 all that we could make. Even if it is your gated who injected
99 fatal route to network, even if it were you who configured
100 fatal static route: you are innocent. :-)
104 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
105 practically identical code. It would be good to glue them
106 together, but it is not very evident, how to make them modular.
107 sit is integral part of IPv6, ipip and gre are naturally modular.
108 We could extract common parts (hash table, ioctl etc)
109 to a separate module (ip_tunnel.c).
114 static int ipgre_tunnel_init(struct net_device
*dev
);
116 /* Fallback tunnel: no source, no destination, no key, no options */
118 static int ipgre_fb_tunnel_init(struct net_device
*dev
);
120 static struct net_device ipgre_fb_tunnel_dev
= {
121 NULL
, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL
, ipgre_fb_tunnel_init
,
124 static struct ip_tunnel ipgre_fb_tunnel
= {
125 NULL
, &ipgre_fb_tunnel_dev
, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", }
128 /* Tunnel hash table */
138 We require exact key match i.e. if a key is present in packet
139 it will match only tunnel with the same key; if it is not present,
140 it will match only keyless tunnel.
142 All keysless packets, if not matched configured keyless tunnels
143 will match fallback tunnel.
147 #define HASH(addr) ((addr^(addr>>4))&0xF)
149 static struct ip_tunnel
*tunnels
[4][HASH_SIZE
];
151 #define tunnels_r_l (tunnels[3])
152 #define tunnels_r (tunnels[2])
153 #define tunnels_l (tunnels[1])
154 #define tunnels_wc (tunnels[0])
156 static rwlock_t ipgre_lock
= RW_LOCK_UNLOCKED
;
158 /* Given src, dst and key, find approriate for input tunnel. */
160 static struct ip_tunnel
* ipgre_tunnel_lookup(u32 remote
, u32 local
, u32 key
)
162 unsigned h0
= HASH(remote
);
163 unsigned h1
= HASH(key
);
166 for (t
= tunnels_r_l
[h0
^h1
]; t
; t
= t
->next
) {
167 if (local
== t
->parms
.iph
.saddr
&& remote
== t
->parms
.iph
.daddr
) {
168 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
172 for (t
= tunnels_r
[h0
^h1
]; t
; t
= t
->next
) {
173 if (remote
== t
->parms
.iph
.daddr
) {
174 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
178 for (t
= tunnels_l
[h1
]; t
; t
= t
->next
) {
179 if (local
== t
->parms
.iph
.saddr
||
180 (local
== t
->parms
.iph
.daddr
&& MULTICAST(local
))) {
181 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
185 for (t
= tunnels_wc
[h1
]; t
; t
= t
->next
) {
186 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
189 if (ipgre_fb_tunnel_dev
.flags
&IFF_UP
)
190 return &ipgre_fb_tunnel
;
194 static struct ip_tunnel
**ipgre_bucket(struct ip_tunnel
*t
)
196 u32 remote
= t
->parms
.iph
.daddr
;
197 u32 local
= t
->parms
.iph
.saddr
;
198 u32 key
= t
->parms
.i_key
;
199 unsigned h
= HASH(key
);
204 if (remote
&& !MULTICAST(remote
)) {
209 return &tunnels
[prio
][h
];
212 static void ipgre_tunnel_link(struct ip_tunnel
*t
)
214 struct ip_tunnel
**tp
= ipgre_bucket(t
);
217 write_lock_bh(&ipgre_lock
);
219 write_unlock_bh(&ipgre_lock
);
222 static void ipgre_tunnel_unlink(struct ip_tunnel
*t
)
224 struct ip_tunnel
**tp
;
226 for (tp
= ipgre_bucket(t
); *tp
; tp
= &(*tp
)->next
) {
228 write_lock_bh(&ipgre_lock
);
230 write_unlock_bh(&ipgre_lock
);
236 static struct ip_tunnel
* ipgre_tunnel_locate(struct ip_tunnel_parm
*parms
, int create
)
238 u32 remote
= parms
->iph
.daddr
;
239 u32 local
= parms
->iph
.saddr
;
240 u32 key
= parms
->i_key
;
241 struct ip_tunnel
*t
, **tp
, *nt
;
242 struct net_device
*dev
;
243 unsigned h
= HASH(key
);
248 if (remote
&& !MULTICAST(remote
)) {
252 for (tp
= &tunnels
[prio
][h
]; (t
= *tp
) != NULL
; tp
= &t
->next
) {
253 if (local
== t
->parms
.iph
.saddr
&& remote
== t
->parms
.iph
.daddr
) {
254 if (key
== t
->parms
.i_key
)
262 dev
= kmalloc(sizeof(*dev
) + sizeof(*t
), GFP_KERNEL
);
267 memset(dev
, 0, sizeof(*dev
) + sizeof(*t
));
268 dev
->priv
= (void*)(dev
+1);
269 nt
= (struct ip_tunnel
*)dev
->priv
;
271 dev
->name
= nt
->parms
.name
;
272 dev
->init
= ipgre_tunnel_init
;
274 memcpy(&nt
->parms
, parms
, sizeof(*parms
));
275 if (dev
->name
[0] == 0) {
277 for (i
=1; i
<100; i
++) {
278 sprintf(dev
->name
, "gre%d", i
);
279 if (__dev_get_by_name(dev
->name
) == NULL
)
284 memcpy(parms
->name
, dev
->name
, IFNAMSIZ
);
286 if (register_netdevice(dev
) < 0)
290 ipgre_tunnel_link(nt
);
291 /* Do not decrement MOD_USE_COUNT here. */
300 static void ipgre_tunnel_destructor(struct net_device
*dev
)
302 if (dev
!= &ipgre_fb_tunnel_dev
) {
307 static void ipgre_tunnel_uninit(struct net_device
*dev
)
309 ipgre_tunnel_unlink((struct ip_tunnel
*)dev
->priv
);
314 void ipgre_err(struct sk_buff
*skb
, unsigned char *dp
, int len
)
316 #ifndef I_WISH_WORLD_WERE_PERFECT
318 /* It is not :-( All the routers (except for Linux) return only
319 8 bytes of packet payload. It means, that precise relaying of
320 ICMP in the real Internet is absolutely infeasible.
322 Moreover, Cisco "wise men" put GRE key to the third word
323 in GRE header. It makes impossible maintaining even soft state for keyed
324 GRE tunnels with enabled checksum. Tell them "thank you".
326 Well, I wonder, rfc1812 was written by Cisco employee,
327 what the hell these idiots break standrads established
331 struct iphdr
*iph
= (struct iphdr
*)dp
;
332 u16
*p
= (u16
*)(dp
+(iph
->ihl
<<2));
333 int grehlen
= (iph
->ihl
<<2) + 4;
334 int type
= skb
->h
.icmph
->type
;
335 int code
= skb
->h
.icmph
->code
;
340 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
|GRE_ROUTING
|GRE_VERSION
)) {
341 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
350 /* If only 8 bytes returned, keyed message will be dropped here */
356 case ICMP_PARAMETERPROB
:
359 case ICMP_DEST_UNREACH
:
362 case ICMP_PORT_UNREACH
:
363 /* Impossible event. */
365 case ICMP_FRAG_NEEDED
:
366 /* Soft state for pmtu is maintained by IP core. */
369 /* All others are translated to HOST_UNREACH.
370 rfc2003 contains "deep thoughts" about NET_UNREACH,
371 I believe they are just ether pollution. --ANK
376 case ICMP_TIME_EXCEEDED
:
377 if (code
!= ICMP_EXC_TTL
)
382 read_lock(&ipgre_lock
);
383 t
= ipgre_tunnel_lookup(iph
->daddr
, iph
->saddr
, (flags
&GRE_KEY
) ? *(((u32
*)p
) + (grehlen
>>2) - 1) : 0);
384 if (t
== NULL
|| t
->parms
.iph
.daddr
== 0 || MULTICAST(t
->parms
.iph
.daddr
))
387 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
390 if (jiffies
- t
->err_time
< IPTUNNEL_ERR_TIMEO
)
394 t
->err_time
= jiffies
;
396 read_unlock(&ipgre_lock
);
399 struct iphdr
*iph
= (struct iphdr
*)dp
;
401 u16
*p
= (u16
*)(dp
+(iph
->ihl
<<2));
402 int type
= skb
->h
.icmph
->type
;
403 int code
= skb
->h
.icmph
->code
;
408 int grehlen
= (iph
->ihl
<<2) + 4;
409 struct sk_buff
*skb2
;
412 if (p
[1] != __constant_htons(ETH_P_IP
))
416 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
|GRE_ROUTING
|GRE_VERSION
)) {
417 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
426 if (len
< grehlen
+ sizeof(struct iphdr
))
428 eiph
= (struct iphdr
*)(dp
+ grehlen
);
433 case ICMP_PARAMETERPROB
:
434 if (skb
->h
.icmph
->un
.gateway
< (iph
->ihl
<<2))
437 /* So... This guy found something strange INSIDE encapsulated
438 packet. Well, he is fool, but what can we do ?
440 rel_type
= ICMP_PARAMETERPROB
;
441 rel_info
= skb
->h
.icmph
->un
.gateway
- grehlen
;
444 case ICMP_DEST_UNREACH
:
447 case ICMP_PORT_UNREACH
:
448 /* Impossible event. */
450 case ICMP_FRAG_NEEDED
:
451 /* And it is the only really necesary thing :-) */
452 rel_info
= ntohs(skb
->h
.icmph
->un
.frag
.mtu
);
453 if (rel_info
< grehlen
+68)
456 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
457 if (rel_info
> ntohs(eiph
->tot_len
))
461 /* All others are translated to HOST_UNREACH.
462 rfc2003 contains "deep thoughts" about NET_UNREACH,
463 I believe, it is just ether pollution. --ANK
465 rel_type
= ICMP_DEST_UNREACH
;
466 rel_code
= ICMP_HOST_UNREACH
;
470 case ICMP_TIME_EXCEEDED
:
471 if (code
!= ICMP_EXC_TTL
)
476 /* Prepare fake skb to feed it to icmp_send */
477 skb2
= skb_clone(skb
, GFP_ATOMIC
);
480 dst_release(skb2
->dst
);
482 skb_pull(skb2
, skb
->data
- (u8
*)eiph
);
483 skb2
->nh
.raw
= skb2
->data
;
485 /* Try to guess incoming interface */
486 if (ip_route_output(&rt
, eiph
->saddr
, 0, RT_TOS(eiph
->tos
), 0)) {
490 skb2
->dev
= rt
->u
.dst
.dev
;
492 /* route "incoming" packet */
493 if (rt
->rt_flags
&RTCF_LOCAL
) {
496 if (ip_route_output(&rt
, eiph
->daddr
, eiph
->saddr
, eiph
->tos
, 0) ||
497 rt
->u
.dst
.dev
->type
!= ARPHRD_IPGRE
) {
504 if (ip_route_input(skb2
, eiph
->daddr
, eiph
->saddr
, eiph
->tos
, skb2
->dev
) ||
505 skb2
->dst
->dev
->type
!= ARPHRD_IPGRE
) {
511 /* change mtu on this route */
512 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
513 if (rel_info
> skb2
->dst
->pmtu
) {
517 skb2
->dst
->pmtu
= rel_info
;
518 rel_info
= htonl(rel_info
);
519 } else if (type
== ICMP_TIME_EXCEEDED
) {
520 struct ip_tunnel
*t
= (struct ip_tunnel
*)skb2
->dev
->priv
;
521 if (t
->parms
.iph
.ttl
) {
522 rel_type
= ICMP_DEST_UNREACH
;
523 rel_code
= ICMP_HOST_UNREACH
;
527 icmp_send(skb2
, rel_type
, rel_code
, rel_info
);
532 int ipgre_rcv(struct sk_buff
*skb
, unsigned short len
)
534 struct iphdr
*iph
= skb
->nh
.iph
;
536 u16 flags
= *(u16
*)h
;
540 struct ip_tunnel
*tunnel
;
543 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_ROUTING
|GRE_SEQ
|GRE_VERSION
)) {
544 /* - Version must be 0.
545 - We do not support routing headers.
547 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
550 if (flags
&GRE_CSUM
) {
551 csum
= ip_compute_csum(h
, len
);
555 key
= *(u32
*)(h
+ offset
);
559 seqno
= ntohl(*(u32
*)(h
+ offset
));
564 read_lock(&ipgre_lock
);
565 if ((tunnel
= ipgre_tunnel_lookup(iph
->saddr
, iph
->daddr
, key
)) != NULL
) {
566 skb
->mac
.raw
= skb
->nh
.raw
;
567 skb
->nh
.raw
= skb_pull(skb
, h
+ offset
- skb
->data
);
568 memset(&(IPCB(skb
)->opt
), 0, sizeof(struct ip_options
));
570 skb
->protocol
= *(u16
*)(h
+ 2);
571 skb
->pkt_type
= PACKET_HOST
;
572 #ifdef CONFIG_NET_IPGRE_BROADCAST
573 if (MULTICAST(iph
->daddr
)) {
574 /* Looped back packet, drop it! */
575 if (((struct rtable
*)skb
->dst
)->key
.iif
== 0)
577 tunnel
->stat
.multicast
++;
578 skb
->pkt_type
= PACKET_BROADCAST
;
582 if (((flags
&GRE_CSUM
) && csum
) ||
583 (!(flags
&GRE_CSUM
) && tunnel
->parms
.i_flags
&GRE_CSUM
)) {
584 tunnel
->stat
.rx_crc_errors
++;
585 tunnel
->stat
.rx_errors
++;
588 if (tunnel
->parms
.i_flags
&GRE_SEQ
) {
589 if (!(flags
&GRE_SEQ
) ||
590 (tunnel
->i_seqno
&& (s32
)(seqno
- tunnel
->i_seqno
) < 0)) {
591 tunnel
->stat
.rx_fifo_errors
++;
592 tunnel
->stat
.rx_errors
++;
595 tunnel
->i_seqno
= seqno
+ 1;
597 tunnel
->stat
.rx_packets
++;
598 tunnel
->stat
.rx_bytes
+= skb
->len
;
599 skb
->dev
= tunnel
->dev
;
600 dst_release(skb
->dst
);
603 read_unlock(&ipgre_lock
);
606 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PROT_UNREACH
, 0);
609 read_unlock(&ipgre_lock
);
615 static int ipgre_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
617 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
618 struct net_device_stats
*stats
= &tunnel
->stat
;
619 struct iphdr
*old_iph
= skb
->nh
.iph
;
623 struct rtable
*rt
; /* Route to the other host */
624 struct net_device
*tdev
; /* Device to other host */
625 struct iphdr
*iph
; /* Our new IP header */
626 int max_headroom
; /* The extra header space needed */
631 if (tunnel
->recursion
++) {
632 tunnel
->stat
.collisions
++;
636 if (dev
->hard_header
) {
638 tiph
= (struct iphdr
*)skb
->data
;
640 gre_hlen
= tunnel
->hlen
;
641 tiph
= &tunnel
->parms
.iph
;
644 if ((dst
= tiph
->daddr
) == 0) {
647 if (skb
->dst
== NULL
) {
648 tunnel
->stat
.tx_fifo_errors
++;
652 if (skb
->protocol
== __constant_htons(ETH_P_IP
)) {
653 rt
= (struct rtable
*)skb
->dst
;
654 if ((dst
= rt
->rt_gateway
) == 0)
658 else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
)) {
659 struct in6_addr
*addr6
;
661 struct neighbour
*neigh
= skb
->dst
->neighbour
;
666 addr6
= (struct in6_addr
*)&neigh
->primary_key
;
667 addr_type
= ipv6_addr_type(addr6
);
669 if (addr_type
== IPV6_ADDR_ANY
) {
670 addr6
= &skb
->nh
.ipv6h
->daddr
;
671 addr_type
= ipv6_addr_type(addr6
);
674 if ((addr_type
& IPV6_ADDR_COMPATv4
) == 0)
677 dst
= addr6
->s6_addr32
[3];
686 if (skb
->protocol
== __constant_htons(ETH_P_IP
))
691 if (ip_route_output(&rt
, dst
, tiph
->saddr
, RT_TOS(tos
), tunnel
->parms
.link
)) {
692 tunnel
->stat
.tx_carrier_errors
++;
695 tdev
= rt
->u
.dst
.dev
;
699 tunnel
->stat
.collisions
++;
704 mtu
= rt
->u
.dst
.pmtu
- tunnel
->hlen
;
706 if (skb
->protocol
== __constant_htons(ETH_P_IP
)) {
707 if (skb
->dst
&& mtu
< skb
->dst
->pmtu
&& mtu
>= 68)
708 skb
->dst
->pmtu
= mtu
;
710 df
|= (old_iph
->frag_off
&__constant_htons(IP_DF
));
712 if ((old_iph
->frag_off
&__constant_htons(IP_DF
)) &&
713 mtu
< ntohs(old_iph
->tot_len
)) {
714 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_FRAG_NEEDED
, htonl(mtu
));
720 else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
)) {
721 struct rt6_info
*rt6
= (struct rt6_info
*)skb
->dst
;
723 if (rt6
&& mtu
< rt6
->u
.dst
.pmtu
&& mtu
>= IPV6_MIN_MTU
) {
724 if ((tunnel
->parms
.iph
.daddr
&& !MULTICAST(tunnel
->parms
.iph
.daddr
)) ||
725 rt6
->rt6i_dst
.plen
== 128) {
726 rt6
->rt6i_flags
|= RTF_MODIFIED
;
727 skb
->dst
->pmtu
= mtu
;
731 if (mtu
>= IPV6_MIN_MTU
&& mtu
< skb
->len
- tunnel
->hlen
+ gre_hlen
) {
732 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
, dev
);
739 if (tunnel
->err_count
> 0) {
740 if (jiffies
- tunnel
->err_time
< IPTUNNEL_ERR_TIMEO
) {
743 dst_link_failure(skb
);
745 tunnel
->err_count
= 0;
748 skb
->h
.raw
= skb
->nh
.raw
;
750 max_headroom
= ((tdev
->hard_header_len
+15)&~15)+ gre_hlen
;
752 if (skb_headroom(skb
) < max_headroom
|| skb_cloned(skb
) || skb_shared(skb
)) {
753 struct sk_buff
*new_skb
= skb_realloc_headroom(skb
, max_headroom
);
762 skb_set_owner_w(new_skb
, skb
->sk
);
767 skb
->nh
.raw
= skb_push(skb
, gre_hlen
);
768 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
769 dst_release(skb
->dst
);
770 skb
->dst
= &rt
->u
.dst
;
773 * Push down and install the IPIP header.
778 iph
->ihl
= sizeof(struct iphdr
) >> 2;
780 iph
->protocol
= IPPROTO_GRE
;
782 iph
->daddr
= rt
->rt_dst
;
783 iph
->saddr
= rt
->rt_src
;
785 if ((iph
->ttl
= tiph
->ttl
) == 0) {
786 if (skb
->protocol
== __constant_htons(ETH_P_IP
))
787 iph
->ttl
= old_iph
->ttl
;
789 else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
))
790 iph
->ttl
= ((struct ipv6hdr
*)old_iph
)->hop_limit
;
793 iph
->ttl
= ip_statistics
.IpDefaultTTL
;
796 ((u16
*)(iph
+1))[0] = tunnel
->parms
.o_flags
;
797 ((u16
*)(iph
+1))[1] = skb
->protocol
;
799 if (tunnel
->parms
.o_flags
&(GRE_KEY
|GRE_CSUM
|GRE_SEQ
)) {
800 u32
*ptr
= (u32
*)(((u8
*)iph
) + tunnel
->hlen
- 4);
802 if (tunnel
->parms
.o_flags
&GRE_SEQ
) {
804 *ptr
= htonl(tunnel
->o_seqno
);
807 if (tunnel
->parms
.o_flags
&GRE_KEY
) {
808 *ptr
= tunnel
->parms
.o_key
;
811 if (tunnel
->parms
.o_flags
&GRE_CSUM
) {
813 *(__u16
*)ptr
= ip_compute_csum((void*)(iph
+1), skb
->len
- sizeof(struct iphdr
));
817 iph
->tot_len
= htons(skb
->len
);
818 iph
->id
= htons(ip_id_count
++);
821 stats
->tx_bytes
+= skb
->len
;
828 dst_link_failure(skb
);
838 ipgre_tunnel_ioctl (struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
841 struct ip_tunnel_parm p
;
849 if (dev
== &ipgre_fb_tunnel_dev
) {
850 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
))) {
854 t
= ipgre_tunnel_locate(&p
, 0);
857 t
= (struct ip_tunnel
*)dev
->priv
;
858 memcpy(&p
, &t
->parms
, sizeof(p
));
859 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
866 if (!capable(CAP_NET_ADMIN
))
870 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
874 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_GRE
||
875 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&__constant_htons(~IP_DF
)) ||
876 ((p
.i_flags
|p
.o_flags
)&(GRE_VERSION
|GRE_ROUTING
)))
879 p
.iph
.frag_off
|= __constant_htons(IP_DF
);
881 if (!(p
.i_flags
&GRE_KEY
))
883 if (!(p
.o_flags
&GRE_KEY
))
886 t
= ipgre_tunnel_locate(&p
, cmd
== SIOCADDTUNNEL
);
888 if (dev
!= &ipgre_fb_tunnel_dev
&& cmd
== SIOCCHGTUNNEL
&&
889 t
!= &ipgre_fb_tunnel
) {
898 t
= (struct ip_tunnel
*)dev
->priv
;
900 if (MULTICAST(p
.iph
.daddr
))
901 nflags
= IFF_BROADCAST
;
902 else if (p
.iph
.daddr
)
903 nflags
= IFF_POINTOPOINT
;
905 if ((dev
->flags
^nflags
)&(IFF_POINTOPOINT
|IFF_BROADCAST
)) {
909 ipgre_tunnel_unlink(t
);
910 t
->parms
.iph
.saddr
= p
.iph
.saddr
;
911 t
->parms
.iph
.daddr
= p
.iph
.daddr
;
912 t
->parms
.i_key
= p
.i_key
;
913 t
->parms
.o_key
= p
.o_key
;
914 memcpy(dev
->dev_addr
, &p
.iph
.saddr
, 4);
915 memcpy(dev
->broadcast
, &p
.iph
.daddr
, 4);
916 ipgre_tunnel_link(t
);
917 netdev_state_change(dev
);
923 if (cmd
== SIOCCHGTUNNEL
) {
924 t
->parms
.iph
.ttl
= p
.iph
.ttl
;
925 t
->parms
.iph
.tos
= p
.iph
.tos
;
926 t
->parms
.iph
.frag_off
= p
.iph
.frag_off
;
928 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &t
->parms
, sizeof(p
)))
931 err
= (cmd
== SIOCADDTUNNEL
? -ENOBUFS
: -ENOENT
);
936 if (!capable(CAP_NET_ADMIN
))
939 if (dev
== &ipgre_fb_tunnel_dev
) {
941 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
944 if ((t
= ipgre_tunnel_locate(&p
, 0)) == NULL
)
947 if (t
== &ipgre_fb_tunnel
)
950 err
= unregister_netdevice(dev
);
962 static struct net_device_stats
*ipgre_tunnel_get_stats(struct net_device
*dev
)
964 return &(((struct ip_tunnel
*)dev
->priv
)->stat
);
967 static int ipgre_tunnel_change_mtu(struct net_device
*dev
, int new_mtu
)
969 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
970 if (new_mtu
< 68 || new_mtu
> 0xFFF8 - tunnel
->hlen
)
976 #ifdef CONFIG_NET_IPGRE_BROADCAST
977 /* Nice toy. Unfortunately, useless in real life :-)
978 It allows to construct virtual multiprotocol broadcast "LAN"
979 over the Internet, provided multicast routing is tuned.
982 I have no idea was this bicycle invented before me,
983 so that I had to set ARPHRD_IPGRE to a random value.
984 I have an impression, that Cisco could make something similar,
985 but this feature is apparently missing in IOS<=11.2(8).
987 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
988 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
990 ping -t 255 224.66.66.66
992 If nobody answers, mbone does not work.
994 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
995 ip addr add 10.66.66.<somewhat>/24 dev Universe
997 ifconfig Universe add fe80::<Your_real_addr>/10
998 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1001 ftp fec0:6666:6666::193.233.7.65
1006 static int ipgre_header(struct sk_buff
*skb
, struct net_device
*dev
, unsigned short type
,
1007 void *daddr
, void *saddr
, unsigned len
)
1009 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1010 struct iphdr
*iph
= (struct iphdr
*)skb_push(skb
, t
->hlen
);
1011 u16
*p
= (u16
*)(iph
+1);
1013 memcpy(iph
, &t
->parms
.iph
, sizeof(struct iphdr
));
1014 p
[0] = t
->parms
.o_flags
;
1018 * Set the source hardware address.
1022 memcpy(&iph
->saddr
, saddr
, 4);
1025 memcpy(&iph
->daddr
, daddr
, 4);
1028 if (iph
->daddr
&& !MULTICAST(iph
->daddr
))
1034 static int ipgre_open(struct net_device
*dev
)
1036 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1039 if (MULTICAST(t
->parms
.iph
.daddr
)) {
1041 if (ip_route_output(&rt
, t
->parms
.iph
.daddr
,
1042 t
->parms
.iph
.saddr
, RT_TOS(t
->parms
.iph
.tos
),
1045 return -EADDRNOTAVAIL
;
1047 dev
= rt
->u
.dst
.dev
;
1049 if (__in_dev_get(dev
) == NULL
) {
1051 return -EADDRNOTAVAIL
;
1053 t
->mlink
= dev
->ifindex
;
1054 ip_mc_inc_group(__in_dev_get(dev
), t
->parms
.iph
.daddr
);
1059 static int ipgre_close(struct net_device
*dev
)
1061 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1062 if (MULTICAST(t
->parms
.iph
.daddr
) && t
->mlink
) {
1063 struct in_device
*in_dev
= inetdev_by_index(t
->mlink
);
1065 ip_mc_dec_group(in_dev
, t
->parms
.iph
.daddr
);
1075 static void ipgre_tunnel_init_gen(struct net_device
*dev
)
1077 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1079 dev
->uninit
= ipgre_tunnel_uninit
;
1080 dev
->destructor
= ipgre_tunnel_destructor
;
1081 dev
->hard_start_xmit
= ipgre_tunnel_xmit
;
1082 dev
->get_stats
= ipgre_tunnel_get_stats
;
1083 dev
->do_ioctl
= ipgre_tunnel_ioctl
;
1084 dev
->change_mtu
= ipgre_tunnel_change_mtu
;
1086 dev_init_buffers(dev
);
1088 dev
->type
= ARPHRD_IPGRE
;
1089 dev
->hard_header_len
= LL_MAX_HEADER
+ sizeof(struct iphdr
) + 4;
1090 dev
->mtu
= 1500 - sizeof(struct iphdr
) - 4;
1091 dev
->flags
= IFF_NOARP
;
1094 memcpy(dev
->dev_addr
, &t
->parms
.iph
.saddr
, 4);
1095 memcpy(dev
->broadcast
, &t
->parms
.iph
.daddr
, 4);
1098 static int ipgre_tunnel_init(struct net_device
*dev
)
1100 struct net_device
*tdev
= NULL
;
1101 struct ip_tunnel
*tunnel
;
1103 int hlen
= LL_MAX_HEADER
;
1105 int addend
= sizeof(struct iphdr
) + 4;
1107 tunnel
= (struct ip_tunnel
*)dev
->priv
;
1108 iph
= &tunnel
->parms
.iph
;
1110 ipgre_tunnel_init_gen(dev
);
1112 /* Guess output device to choose reasonable mtu and hard_header_len */
1116 if (!ip_route_output(&rt
, iph
->daddr
, iph
->saddr
, RT_TOS(iph
->tos
), tunnel
->parms
.link
)) {
1117 tdev
= rt
->u
.dst
.dev
;
1121 dev
->flags
|= IFF_POINTOPOINT
;
1123 #ifdef CONFIG_NET_IPGRE_BROADCAST
1124 if (MULTICAST(iph
->daddr
)) {
1127 dev
->flags
= IFF_BROADCAST
;
1128 dev
->hard_header
= ipgre_header
;
1129 dev
->open
= ipgre_open
;
1130 dev
->stop
= ipgre_close
;
1135 if (!tdev
&& tunnel
->parms
.link
)
1136 tdev
= __dev_get_by_index(tunnel
->parms
.link
);
1139 hlen
= tdev
->hard_header_len
;
1142 dev
->iflink
= tunnel
->parms
.link
;
1144 /* Precalculate GRE options length */
1145 if (tunnel
->parms
.o_flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
)) {
1146 if (tunnel
->parms
.o_flags
&GRE_CSUM
)
1148 if (tunnel
->parms
.o_flags
&GRE_KEY
)
1150 if (tunnel
->parms
.o_flags
&GRE_SEQ
)
1153 dev
->hard_header_len
= hlen
+ addend
;
1154 dev
->mtu
= mtu
- addend
;
1155 tunnel
->hlen
= addend
;
1160 static int ipgre_fb_tunnel_open(struct net_device
*dev
)
1166 static int ipgre_fb_tunnel_close(struct net_device
*dev
)
1173 int __init
ipgre_fb_tunnel_init(struct net_device
*dev
)
1175 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
1178 ipgre_tunnel_init_gen(dev
);
1180 dev
->open
= ipgre_fb_tunnel_open
;
1181 dev
->stop
= ipgre_fb_tunnel_close
;
1184 iph
= &ipgre_fb_tunnel
.parms
.iph
;
1186 iph
->protocol
= IPPROTO_GRE
;
1188 tunnel
->hlen
= sizeof(struct iphdr
) + 4;
1191 tunnels_wc
[0] = &ipgre_fb_tunnel
;
1196 static struct inet_protocol ipgre_protocol
= {
1197 ipgre_rcv
, /* GRE handler */
1198 ipgre_err
, /* TUNNEL error control */
1200 IPPROTO_GRE
, /* protocol ID */
1208 * And now the modules code and kernel interface.
1212 int init_module(void)
1214 int __init
ipgre_init(void)
1217 printk(KERN_INFO
"GRE over IPv4 tunneling driver\n");
1219 ipgre_fb_tunnel_dev
.priv
= (void*)&ipgre_fb_tunnel
;
1220 ipgre_fb_tunnel_dev
.name
= ipgre_fb_tunnel
.parms
.name
;
1222 register_netdev(&ipgre_fb_tunnel_dev
);
1224 register_netdevice(&ipgre_fb_tunnel_dev
);
1227 inet_add_protocol(&ipgre_protocol
);
1233 void cleanup_module(void)
1235 if ( inet_del_protocol(&ipgre_protocol
) < 0 )
1236 printk(KERN_INFO
"ipgre close: can't remove protocol\n");
1238 unregister_netdev(&ipgre_fb_tunnel_dev
);