net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/config.h>
  98 #include <linux/module.h>
  99 #include <linux/types.h>
 100 #include <linux/sched.h>
 101 #include <linux/kernel.h>
 102 #include <asm/uaccess.h>
 103 #include <linux/skbuff.h>
 104 #include <linux/netdevice.h>
 105 #include <linux/in.h>
 106 #include <linux/tcp.h>
 107 #include <linux/udp.h>
 108 #include <linux/if_arp.h>
 109 #include <linux/mroute.h>
 110 #include <linux/init.h>
 111 #include <linux/netfilter_ipv4.h>
 112 #include <linux/if_ether.h>
 113
 114 #include <net/sock.h>
 115 #include <net/ip.h>
 116 #include <net/icmp.h>
 117 #include <net/protocol.h>
 118 #include <net/ipip.h>
 119 #include <net/inet_ecn.h>
 120 #include <net/xfrm.h>
 121
 122 #define HASH_SIZE  16
 123 #define HASH(addr) ((addr^(addr>>4))&0xF)
 124
 125 static int ipip_fb_tunnel_init(struct net_device *dev);
 126 static int ipip_tunnel_init(struct net_device *dev);
 127 static void ipip_tunnel_setup(struct net_device *dev);
 128
 129 static struct net_device *ipip_fb_tunnel_dev;
 130
 131 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 132 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 133 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 134 static struct ip_tunnel *tunnels_wc[1];
 135 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 136
 137 static DEFINE_RWLOCK(ipip_lock);
 138
 139 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
 140 {
 141         unsigned h0 = HASH(remote);
 142         unsigned h1 = HASH(local);
 143         struct ip_tunnel *t;
 144
 145         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 146                 if (local == t->parms.iph.saddr &&
 147                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 148                         return t;
 149         }
 150         for (t = tunnels_r[h0]; t; t = t->next) {
 151                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 152                         return t;
 153         }
 154         for (t = tunnels_l[h1]; t; t = t->next) {
 155                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 156                         return t;
 157         }
 158         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 159                 return t;
 160         return NULL;
 161 }
 162
 163 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 164 {
 165         u32 remote = t->parms.iph.daddr;
 166         u32 local = t->parms.iph.saddr;
 167         unsigned h = 0;
 168         int prio = 0;
 169
 170         if (remote) {
 171                 prio |= 2;
 172                 h ^= HASH(remote);
 173         }
 174         if (local) {
 175                 prio |= 1;
 176                 h ^= HASH(local);
 177         }
 178         return &tunnels[prio][h];
 179 }
 180
 181
 182 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 183 {
 184         struct ip_tunnel **tp;
 185
 186         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 187                 if (t == *tp) {
 188                         write_lock_bh(&ipip_lock);
 189                         *tp = t->next;
 190                         write_unlock_bh(&ipip_lock);
 191                         break;
 192                 }
 193         }
 194 }
 195
 196 static void ipip_tunnel_link(struct ip_tunnel *t)
 197 {
 198         struct ip_tunnel **tp = ipip_bucket(t);
 199
 200         t->next = *tp;
 201         write_lock_bh(&ipip_lock);
 202         *tp = t;
 203         write_unlock_bh(&ipip_lock);
 204 }
 205
 206 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 207 {
 208         u32 remote = parms->iph.daddr;
 209         u32 local = parms->iph.saddr;
 210         struct ip_tunnel *t, **tp, *nt;
 211         struct net_device *dev;
 212         unsigned h = 0;
 213         int prio = 0;
 214         char name[IFNAMSIZ];
 215
 216         if (remote) {
 217                 prio |= 2;
 218                 h ^= HASH(remote);
 219         }
 220         if (local) {
 221                 prio |= 1;
 222                 h ^= HASH(local);
 223         }
 224         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 225                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 226                         return t;
 227         }
 228         if (!create)
 229                 return NULL;
 230
 231         if (parms->name[0])
 232                 strlcpy(name, parms->name, IFNAMSIZ);
 233         else {
 234                 int i;
 235                 for (i=1; i<100; i++) {
 236                         sprintf(name, "tunl%d", i);
 237                         if (__dev_get_by_name(name) == NULL)
 238                                 break;
 239                 }
 240                 if (i==100)
 241                         goto failed;
 242         }
 243
 244         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 245         if (dev == NULL)
 246                 return NULL;
 247
 248         nt = netdev_priv(dev);
 249         SET_MODULE_OWNER(dev);
 250         dev->init = ipip_tunnel_init;
 251         nt->parms = *parms;
 252
 253         if (register_netdevice(dev) < 0) {
 254                 free_netdev(dev);
 255                 goto failed;
 256         }
 257
 258         dev_hold(dev);
 259         ipip_tunnel_link(nt);
 260         return nt;
 261
 262 failed:
 263         return NULL;
 264 }
 265
 266 static void ipip_tunnel_uninit(struct net_device *dev)
 267 {
 268         if (dev == ipip_fb_tunnel_dev) {
 269                 write_lock_bh(&ipip_lock);
 270                 tunnels_wc[0] = NULL;
 271                 write_unlock_bh(&ipip_lock);
 272         } else
 273                 ipip_tunnel_unlink(netdev_priv(dev));
 274         dev_put(dev);
 275 }
 276
 277 static void ipip_err(struct sk_buff *skb, u32 info)
 278 {
 279 #ifndef I_WISH_WORLD_WERE_PERFECT
 280
 281 /* It is not :-( All the routers (except for Linux) return only
 282    8 bytes of packet payload. It means, that precise relaying of
 283    ICMP in the real Internet is absolutely infeasible.
 284  */
 285         struct iphdr *iph = (struct iphdr*)skb->data;
 286         int type = skb->h.icmph->type;
 287         int code = skb->h.icmph->code;
 288         struct ip_tunnel *t;
 289
 290         switch (type) {
 291         default:
 292         case ICMP_PARAMETERPROB:
 293                 return;
 294
 295         case ICMP_DEST_UNREACH:
 296                 switch (code) {
 297                 case ICMP_SR_FAILED:
 298                 case ICMP_PORT_UNREACH:
 299                         /* Impossible event. */
 300                         return;
 301                 case ICMP_FRAG_NEEDED:
 302                         /* Soft state for pmtu is maintained by IP core. */
 303                         return;
 304                 default:
 305                         /* All others are translated to HOST_UNREACH.
 306                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 307                            I believe they are just ether pollution. --ANK
 308                          */
 309                         break;
 310                 }
 311                 break;
 312         case ICMP_TIME_EXCEEDED:
 313                 if (code != ICMP_EXC_TTL)
 314                         return;
 315                 break;
 316         }
 317
 318         read_lock(&ipip_lock);
 319         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 320         if (t == NULL || t->parms.iph.daddr == 0)
 321                 goto out;
 322         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 323                 goto out;
 324
 325         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 326                 t->err_count++;
 327         else
 328                 t->err_count = 1;
 329         t->err_time = jiffies;
 330 out:
 331         read_unlock(&ipip_lock);
 332         return;
 333 #else
 334         struct iphdr *iph = (struct iphdr*)dp;
 335         int hlen = iph->ihl<<2;
 336         struct iphdr *eiph;
 337         int type = skb->h.icmph->type;
 338         int code = skb->h.icmph->code;
 339         int rel_type = 0;
 340         int rel_code = 0;
 341         int rel_info = 0;
 342         struct sk_buff *skb2;
 343         struct flowi fl;
 344         struct rtable *rt;
 345
 346         if (len < hlen + sizeof(struct iphdr))
 347                 return;
 348         eiph = (struct iphdr*)(dp + hlen);
 349
 350         switch (type) {
 351         default:
 352                 return;
 353         case ICMP_PARAMETERPROB:
 354                 if (skb->h.icmph->un.gateway < hlen)
 355                         return;
 356
 357                 /* So... This guy found something strange INSIDE encapsulated
 358                    packet. Well, he is fool, but what can we do ?
 359                  */
 360                 rel_type = ICMP_PARAMETERPROB;
 361                 rel_info = skb->h.icmph->un.gateway - hlen;
 362                 break;
 363
 364         case ICMP_DEST_UNREACH:
 365                 switch (code) {
 366                 case ICMP_SR_FAILED:
 367                 case ICMP_PORT_UNREACH:
 368                         /* Impossible event. */
 369                         return;
 370                 case ICMP_FRAG_NEEDED:
 371                         /* And it is the only really necessary thing :-) */
 372                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
 373                         if (rel_info < hlen+68)
 374                                 return;
 375                         rel_info -= hlen;
 376                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 377                         if (rel_info > ntohs(eiph->tot_len))
 378                                 return;
 379                         break;
 380                 default:
 381                         /* All others are translated to HOST_UNREACH.
 382                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 383                            I believe, it is just ether pollution. --ANK
 384                          */
 385                         rel_type = ICMP_DEST_UNREACH;
 386                         rel_code = ICMP_HOST_UNREACH;
 387                         break;
 388                 }
 389                 break;
 390         case ICMP_TIME_EXCEEDED:
 391                 if (code != ICMP_EXC_TTL)
 392                         return;
 393                 break;
 394         }
 395
 396         /* Prepare fake skb to feed it to icmp_send */
 397         skb2 = skb_clone(skb, GFP_ATOMIC);
 398         if (skb2 == NULL)
 399                 return;
 400         dst_release(skb2->dst);
 401         skb2->dst = NULL;
 402         skb_pull(skb2, skb->data - (u8*)eiph);
 403         skb2->nh.raw = skb2->data;
 404
 405         /* Try to guess incoming interface */
 406         memset(&fl, 0, sizeof(fl));
 407         fl.fl4_daddr = eiph->saddr;
 408         fl.fl4_tos = RT_TOS(eiph->tos);
 409         fl.proto = IPPROTO_IPIP;
 410         if (ip_route_output_key(&rt, &key)) {
 411                 kfree_skb(skb2);
 412                 return;
 413         }
 414         skb2->dev = rt->u.dst.dev;
 415
 416         /* route "incoming" packet */
 417         if (rt->rt_flags&RTCF_LOCAL) {
 418                 ip_rt_put(rt);
 419                 rt = NULL;
 420                 fl.fl4_daddr = eiph->daddr;
 421                 fl.fl4_src = eiph->saddr;
 422                 fl.fl4_tos = eiph->tos;
 423                 if (ip_route_output_key(&rt, &fl) ||
 424                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 425                         ip_rt_put(rt);
 426                         kfree_skb(skb2);
 427                         return;
 428                 }
 429         } else {
 430                 ip_rt_put(rt);
 431                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 432                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 433                         kfree_skb(skb2);
 434                         return;
 435                 }
 436         }
 437
 438         /* change mtu on this route */
 439         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 440                 if (rel_info > dst_mtu(skb2->dst)) {
 441                         kfree_skb(skb2);
 442                         return;
 443                 }
 444                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 445                 rel_info = htonl(rel_info);
 446         } else if (type == ICMP_TIME_EXCEEDED) {
 447                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 448                 if (t->parms.iph.ttl) {
 449                         rel_type = ICMP_DEST_UNREACH;
 450                         rel_code = ICMP_HOST_UNREACH;
 451                 }
 452         }
 453
 454         icmp_send(skb2, rel_type, rel_code, rel_info);
 455         kfree_skb(skb2);
 456         return;
 457 #endif
 458 }
 459
 460 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
 461 {
 462         struct iphdr *inner_iph = skb->nh.iph;
 463
 464         if (INET_ECN_is_ce(outer_iph->tos))
 465                 IP_ECN_set_ce(inner_iph);
 466 }
 467
 468 static int ipip_rcv(struct sk_buff *skb)
 469 {
 470         struct iphdr *iph;
 471         struct ip_tunnel *tunnel;
 472
 473         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 474                 goto out;
 475
 476         iph = skb->nh.iph;
 477
 478         read_lock(&ipip_lock);
 479         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 480                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 481                         read_unlock(&ipip_lock);
 482                         kfree_skb(skb);
 483                         return 0;
 484                 }
 485
 486                 secpath_reset(skb);
 487
 488                 skb->mac.raw = skb->nh.raw;
 489                 skb->nh.raw = skb->data;
 490                 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 491                 skb->protocol = htons(ETH_P_IP);
 492                 skb->pkt_type = PACKET_HOST;
 493
 494                 tunnel->stat.rx_packets++;
 495                 tunnel->stat.rx_bytes += skb->len;
 496                 skb->dev = tunnel->dev;
 497                 dst_release(skb->dst);
 498                 skb->dst = NULL;
 499                 nf_reset(skb);
 500                 ipip_ecn_decapsulate(iph, skb);
 501                 netif_rx(skb);
 502                 read_unlock(&ipip_lock);
 503                 return 0;
 504         }
 505         read_unlock(&ipip_lock);
 506
 507 out:
 508         return -1;
 509 }
 510
 511 /*
 512  *      This function assumes it is being called from dev_queue_xmit()
 513  *      and that skb is filled properly by that function.
 514  */
 515
 516 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 517 {
 518         struct ip_tunnel *tunnel = netdev_priv(dev);
 519         struct net_device_stats *stats = &tunnel->stat;
 520         struct iphdr  *tiph = &tunnel->parms.iph;
 521         u8     tos = tunnel->parms.iph.tos;
 522         u16    df = tiph->frag_off;
 523         struct rtable *rt;                      /* Route to the other host */
 524         struct net_device *tdev;                        /* Device to other host */
 525         struct iphdr  *old_iph = skb->nh.iph;
 526         struct iphdr  *iph;                     /* Our new IP header */
 527         int    max_headroom;                    /* The extra header space needed */
 528         u32    dst = tiph->daddr;
 529         int    mtu;
 530
 531         if (tunnel->recursion++) {
 532                 tunnel->stat.collisions++;
 533                 goto tx_error;
 534         }
 535
 536         if (skb->protocol != htons(ETH_P_IP))
 537                 goto tx_error;
 538
 539         if (tos&1)
 540                 tos = old_iph->tos;
 541
 542         if (!dst) {
 543                 /* NBMA tunnel */
 544                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 545                         tunnel->stat.tx_fifo_errors++;
 546                         goto tx_error;
 547                 }
 548                 if ((dst = rt->rt_gateway) == 0)
 549                         goto tx_error_icmp;
 550         }
 551
 552         {
 553                 struct flowi fl = { .oif = tunnel->parms.link,
 554                                     .nl_u = { .ip4_u =
 555                                               { .daddr = dst,
 556                                                 .saddr = tiph->saddr,
 557                                                 .tos = RT_TOS(tos) } },
 558                                     .proto = IPPROTO_IPIP };
 559                 if (ip_route_output_key(&rt, &fl)) {
 560                         tunnel->stat.tx_carrier_errors++;
 561                         goto tx_error_icmp;
 562                 }
 563         }
 564         tdev = rt->u.dst.dev;
 565
 566         if (tdev == dev) {
 567                 ip_rt_put(rt);
 568                 tunnel->stat.collisions++;
 569                 goto tx_error;
 570         }
 571
 572         if (tiph->frag_off)
 573                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 574         else
 575                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 576
 577         if (mtu < 68) {
 578                 tunnel->stat.collisions++;
 579                 ip_rt_put(rt);
 580                 goto tx_error;
 581         }
 582         if (skb->dst)
 583                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 584
 585         df |= (old_iph->frag_off&htons(IP_DF));
 586
 587         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 588                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 589                 ip_rt_put(rt);
 590                 goto tx_error;
 591         }
 592
 593         if (tunnel->err_count > 0) {
 594                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 595                         tunnel->err_count--;
 596                         dst_link_failure(skb);
 597                 } else
 598                         tunnel->err_count = 0;
 599         }
 600
 601         /*
 602          * Okay, now see if we can stuff it in the buffer as-is.
 603          */
 604         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 605
 606         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 607                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 608                 if (!new_skb) {
 609                         ip_rt_put(rt);
 610                         stats->tx_dropped++;
 611                         dev_kfree_skb(skb);
 612                         tunnel->recursion--;
 613                         return 0;
 614                 }
 615                 if (skb->sk)
 616                         skb_set_owner_w(new_skb, skb->sk);
 617                 dev_kfree_skb(skb);
 618                 skb = new_skb;
 619                 old_iph = skb->nh.iph;
 620         }
 621
 622         skb->h.raw = skb->nh.raw;
 623         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 624         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 625         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 626                               IPSKB_REROUTED);
 627         dst_release(skb->dst);
 628         skb->dst = &rt->u.dst;
 629
 630         /*
 631          *      Push down and install the IPIP header.
 632          */
 633
 634         iph                     =       skb->nh.iph;
 635         iph->version            =       4;
 636         iph->ihl                =       sizeof(struct iphdr)>>2;
 637         iph->frag_off           =       df;
 638         iph->protocol           =       IPPROTO_IPIP;
 639         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 640         iph->daddr              =       rt->rt_dst;
 641         iph->saddr              =       rt->rt_src;
 642
 643         if ((iph->ttl = tiph->ttl) == 0)
 644                 iph->ttl        =       old_iph->ttl;
 645
 646         nf_reset(skb);
 647
 648         IPTUNNEL_XMIT();
 649         tunnel->recursion--;
 650         return 0;
 651
 652 tx_error_icmp:
 653         dst_link_failure(skb);
 654 tx_error:
 655         stats->tx_errors++;
 656         dev_kfree_skb(skb);
 657         tunnel->recursion--;
 658         return 0;
 659 }
 660
 661 static int
 662 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 663 {
 664         int err = 0;
 665         struct ip_tunnel_parm p;
 666         struct ip_tunnel *t;
 667
 668         switch (cmd) {
 669         case SIOCGETTUNNEL:
 670                 t = NULL;
 671                 if (dev == ipip_fb_tunnel_dev) {
 672                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 673                                 err = -EFAULT;
 674                                 break;
 675                         }
 676                         t = ipip_tunnel_locate(&p, 0);
 677                 }
 678                 if (t == NULL)
 679                         t = netdev_priv(dev);
 680                 memcpy(&p, &t->parms, sizeof(p));
 681                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 682                         err = -EFAULT;
 683                 break;
 684
 685         case SIOCADDTUNNEL:
 686         case SIOCCHGTUNNEL:
 687                 err = -EPERM;
 688                 if (!capable(CAP_NET_ADMIN))
 689                         goto done;
 690
 691                 err = -EFAULT;
 692                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 693                         goto done;
 694
 695                 err = -EINVAL;
 696                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 697                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 698                         goto done;
 699                 if (p.iph.ttl)
 700                         p.iph.frag_off |= htons(IP_DF);
 701
 702                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 703
 704                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 705                         if (t != NULL) {
 706                                 if (t->dev != dev) {
 707                                         err = -EEXIST;
 708                                         break;
 709                                 }
 710                         } else {
 711                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 712                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 713                                         err = -EINVAL;
 714                                         break;
 715                                 }
 716                                 t = netdev_priv(dev);
 717                                 ipip_tunnel_unlink(t);
 718                                 t->parms.iph.saddr = p.iph.saddr;
 719                                 t->parms.iph.daddr = p.iph.daddr;
 720                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 721                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 722                                 ipip_tunnel_link(t);
 723                                 netdev_state_change(dev);
 724                         }
 725                 }
 726
 727                 if (t) {
 728                         err = 0;
 729                         if (cmd == SIOCCHGTUNNEL) {
 730                                 t->parms.iph.ttl = p.iph.ttl;
 731                                 t->parms.iph.tos = p.iph.tos;
 732                                 t->parms.iph.frag_off = p.iph.frag_off;
 733                         }
 734                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 735                                 err = -EFAULT;
 736                 } else
 737                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 738                 break;
 739
 740         case SIOCDELTUNNEL:
 741                 err = -EPERM;
 742                 if (!capable(CAP_NET_ADMIN))
 743                         goto done;
 744
 745                 if (dev == ipip_fb_tunnel_dev) {
 746                         err = -EFAULT;
 747                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 748                                 goto done;
 749                         err = -ENOENT;
 750                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 751                                 goto done;
 752                         err = -EPERM;
 753                         if (t->dev == ipip_fb_tunnel_dev)
 754                                 goto done;
 755                         dev = t->dev;
 756                 }
 757                 err = unregister_netdevice(dev);
 758                 break;
 759
 760         default:
 761                 err = -EINVAL;
 762         }
 763
 764 done:
 765         return err;
 766 }
 767
 768 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 769 {
 770         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 771 }
 772
 773 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 774 {
 775         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 776                 return -EINVAL;
 777         dev->mtu = new_mtu;
 778         return 0;
 779 }
 780
 781 static void ipip_tunnel_setup(struct net_device *dev)
 782 {
 783         SET_MODULE_OWNER(dev);
 784         dev->uninit             = ipip_tunnel_uninit;
 785         dev->hard_start_xmit    = ipip_tunnel_xmit;
 786         dev->get_stats          = ipip_tunnel_get_stats;
 787         dev->do_ioctl           = ipip_tunnel_ioctl;
 788         dev->change_mtu         = ipip_tunnel_change_mtu;
 789         dev->destructor         = free_netdev;
 790
 791         dev->type               = ARPHRD_TUNNEL;
 792         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 793         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 794         dev->flags              = IFF_NOARP;
 795         dev->iflink             = 0;
 796         dev->addr_len           = 4;
 797 }
 798
 799 static int ipip_tunnel_init(struct net_device *dev)
 800 {
 801         struct net_device *tdev = NULL;
 802         struct ip_tunnel *tunnel;
 803         struct iphdr *iph;
 804
 805         tunnel = netdev_priv(dev);
 806         iph = &tunnel->parms.iph;
 807
 808         tunnel->dev = dev;
 809         strcpy(tunnel->parms.name, dev->name);
 810
 811         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 812         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 813
 814         if (iph->daddr) {
 815                 struct flowi fl = { .oif = tunnel->parms.link,
 816                                     .nl_u = { .ip4_u =
 817                                               { .daddr = iph->daddr,
 818                                                 .saddr = iph->saddr,
 819                                                 .tos = RT_TOS(iph->tos) } },
 820                                     .proto = IPPROTO_IPIP };
 821                 struct rtable *rt;
 822                 if (!ip_route_output_key(&rt, &fl)) {
 823                         tdev = rt->u.dst.dev;
 824                         ip_rt_put(rt);
 825                 }
 826                 dev->flags |= IFF_POINTOPOINT;
 827         }
 828
 829         if (!tdev && tunnel->parms.link)
 830                 tdev = __dev_get_by_index(tunnel->parms.link);
 831
 832         if (tdev) {
 833                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 834                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 835         }
 836         dev->iflink = tunnel->parms.link;
 837
 838         return 0;
 839 }
 840
 841 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 842 {
 843         struct ip_tunnel *tunnel = netdev_priv(dev);
 844         struct iphdr *iph = &tunnel->parms.iph;
 845
 846         tunnel->dev = dev;
 847         strcpy(tunnel->parms.name, dev->name);
 848
 849         iph->version            = 4;
 850         iph->protocol           = IPPROTO_IPIP;
 851         iph->ihl                = 5;
 852
 853         dev_hold(dev);
 854         tunnels_wc[0]           = tunnel;
 855         return 0;
 856 }
 857
 858 #ifdef CONFIG_INET_TUNNEL
 859 static struct xfrm_tunnel ipip_handler = {
 860         .handler        =       ipip_rcv,
 861         .err_handler    =       ipip_err,
 862 };
 863
 864 static inline int ipip_register(void)
 865 {
 866         return xfrm4_tunnel_register(&ipip_handler);
 867 }
 868
 869 static inline int ipip_unregister(void)
 870 {
 871         return xfrm4_tunnel_deregister(&ipip_handler);
 872 }
 873 #else
 874 static struct net_protocol ipip_protocol = {
 875         .handler        =       ipip_rcv,
 876         .err_handler    =       ipip_err,
 877         .no_policy      =       1,
 878 };
 879
 880 static inline int ipip_register(void)
 881 {
 882         return inet_add_protocol(&ipip_protocol, IPPROTO_IPIP);
 883 }
 884
 885 static inline int ipip_unregister(void)
 886 {
 887         return inet_del_protocol(&ipip_protocol, IPPROTO_IPIP);
 888 }
 889 #endif
 890
 891 static char banner[] __initdata =
 892         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 893
 894 static int __init ipip_init(void)
 895 {
 896         int err;
 897
 898         printk(banner);
 899
 900         if (ipip_register() < 0) {
 901                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 902                 return -EAGAIN;
 903         }
 904
 905         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 906                                            "tunl0",
 907                                            ipip_tunnel_setup);
 908         if (!ipip_fb_tunnel_dev) {
 909                 err = -ENOMEM;
 910                 goto err1;
 911         }
 912
 913         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 914
 915         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 916                 goto err2;
 917  out:
 918         return err;
 919  err2:
 920         free_netdev(ipip_fb_tunnel_dev);
 921  err1:
 922         ipip_unregister();
 923         goto out;
 924 }
 925
 926 static void __exit ipip_destroy_tunnels(void)
 927 {
 928         int prio;
 929
 930         for (prio = 1; prio < 4; prio++) {
 931                 int h;
 932                 for (h = 0; h < HASH_SIZE; h++) {
 933                         struct ip_tunnel *t;
 934                         while ((t = tunnels[prio][h]) != NULL)
 935                                 unregister_netdevice(t->dev);
 936                 }
 937         }
 938 }
 939
 940 static void __exit ipip_fini(void)
 941 {
 942         if (ipip_unregister() < 0)
 943                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 944
 945         rtnl_lock();
 946         ipip_destroy_tunnels();
 947         unregister_netdevice(ipip_fb_tunnel_dev);
 948         rtnl_unlock();
 949 }
 950
 951 module_init(ipip_init);
 952 module_exit(ipip_fini);
 953 MODULE_LICENSE("GPL");