net/ipv4/ipmr.c

   1 /*
   2  *      IP multicast routing support for mrouted 3.6/3.8
   3  *
   4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *        Linux Consultancy and Custom Driver Development
   6  *
   7  *      This program is free software; you can redistribute it and/or
   8  *      modify it under the terms of the GNU General Public License
   9  *      as published by the Free Software Foundation; either version
  10  *      2 of the License, or (at your option) any later version.
  11  *
  12  *      Fixes:
  13  *      Michael Chastain        :       Incorrect size of copying.
  14  *      Alan Cox                :       Added the cache manager code
  15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
  16  *      Mike McLagan            :       Routing by source
  17  *      Malcolm Beattie         :       Buffer handling fixes.
  18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  19  *      SVR Anand               :       Fixed several multicast bugs and problems.
  20  *      Alexey Kuznetsov        :       Status, optimisations and more.
  21  *      Brad Parker             :       Better behaviour on mrouted upcall
  22  *                                      overflow.
  23  *      Carlos Picoto           :       PIMv1 Support
  24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  25  *                                      Relax this requrement to work with older peers.
  26  *
  27  */
  28
  29 #include <asm/system.h>
  30 #include <asm/uaccess.h>
  31 #include <linux/types.h>
  32 #include <linux/capability.h>
  33 #include <linux/errno.h>
  34 #include <linux/timer.h>
  35 #include <linux/mm.h>
  36 #include <linux/kernel.h>
  37 #include <linux/fcntl.h>
  38 #include <linux/stat.h>
  39 #include <linux/socket.h>
  40 #include <linux/in.h>
  41 #include <linux/inet.h>
  42 #include <linux/netdevice.h>
  43 #include <linux/inetdevice.h>
  44 #include <linux/igmp.h>
  45 #include <linux/proc_fs.h>
  46 #include <linux/seq_file.h>
  47 #include <linux/mroute.h>
  48 #include <linux/init.h>
  49 #include <linux/if_ether.h>
  50 #include <net/net_namespace.h>
  51 #include <net/ip.h>
  52 #include <net/protocol.h>
  53 #include <linux/skbuff.h>
  54 #include <net/route.h>
  55 #include <net/sock.h>
  56 #include <net/icmp.h>
  57 #include <net/udp.h>
  58 #include <net/raw.h>
  59 #include <linux/notifier.h>
  60 #include <linux/if_arp.h>
  61 #include <linux/netfilter_ipv4.h>
  62 #include <net/ipip.h>
  63 #include <net/checksum.h>
  64 #include <net/netlink.h>
  65
  66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  67 #define CONFIG_IP_PIMSM 1
  68 #endif
  69
  70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
  71    Note that the changes are semaphored via rtnl_lock.
  72  */
  73
  74 static DEFINE_RWLOCK(mrt_lock);
  75
  76 /*
  77  *      Multicast router control variables
  78  */
  79
  80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
  81
  82 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
  83
  84 /* Special spinlock for queue of unresolved entries */
  85 static DEFINE_SPINLOCK(mfc_unres_lock);
  86
  87 /* We return to original Alan's scheme. Hash table of resolved
  88    entries is changed only in process context and protected
  89    with weak lock mrt_lock. Queue of unresolved entries is protected
  90    with strong spinlock mfc_unres_lock.
  91
  92    In this case data path is free of exclusive locks at all.
  93  */
  94
  95 static struct kmem_cache *mrt_cachep __read_mostly;
  96
  97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
  98 static int ipmr_cache_report(struct net *net,
  99                              struct sk_buff *pkt, vifi_t vifi, int assert);
 100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 101
 102 static struct timer_list ipmr_expire_timer;
 103
 104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 105
 106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
 107 {
 108         struct net *net = dev_net(dev);
 109
 110         dev_close(dev);
 111
 112         dev = __dev_get_by_name(net, "tunl0");
 113         if (dev) {
 114                 const struct net_device_ops *ops = dev->netdev_ops;
 115                 struct ifreq ifr;
 116                 struct ip_tunnel_parm p;
 117
 118                 memset(&p, 0, sizeof(p));
 119                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 120                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 121                 p.iph.version = 4;
 122                 p.iph.ihl = 5;
 123                 p.iph.protocol = IPPROTO_IPIP;
 124                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 125                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 126
 127                 if (ops->ndo_do_ioctl) {
 128                         mm_segment_t oldfs = get_fs();
 129
 130                         set_fs(KERNEL_DS);
 131                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
 132                         set_fs(oldfs);
 133                 }
 134         }
 135 }
 136
 137 static
 138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 139 {
 140         struct net_device  *dev;
 141
 142         dev = __dev_get_by_name(net, "tunl0");
 143
 144         if (dev) {
 145                 const struct net_device_ops *ops = dev->netdev_ops;
 146                 int err;
 147                 struct ifreq ifr;
 148                 struct ip_tunnel_parm p;
 149                 struct in_device  *in_dev;
 150
 151                 memset(&p, 0, sizeof(p));
 152                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 153                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 154                 p.iph.version = 4;
 155                 p.iph.ihl = 5;
 156                 p.iph.protocol = IPPROTO_IPIP;
 157                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 158                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 159
 160                 if (ops->ndo_do_ioctl) {
 161                         mm_segment_t oldfs = get_fs();
 162
 163                         set_fs(KERNEL_DS);
 164                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 165                         set_fs(oldfs);
 166                 } else
 167                         err = -EOPNOTSUPP;
 168
 169                 dev = NULL;
 170
 171                 if (err == 0 &&
 172                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
 173                         dev->flags |= IFF_MULTICAST;
 174
 175                         in_dev = __in_dev_get_rtnl(dev);
 176                         if (in_dev == NULL)
 177                                 goto failure;
 178
 179                         ipv4_devconf_setall(in_dev);
 180                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 181
 182                         if (dev_open(dev))
 183                                 goto failure;
 184                         dev_hold(dev);
 185                 }
 186         }
 187         return dev;
 188
 189 failure:
 190         /* allow the register to be completed before unregistering. */
 191         rtnl_unlock();
 192         rtnl_lock();
 193
 194         unregister_netdevice(dev);
 195         return NULL;
 196 }
 197
 198 #ifdef CONFIG_IP_PIMSM
 199
 200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 201 {
 202         struct net *net = dev_net(dev);
 203
 204         read_lock(&mrt_lock);
 205         dev->stats.tx_bytes += skb->len;
 206         dev->stats.tx_packets++;
 207         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
 208                           IGMPMSG_WHOLEPKT);
 209         read_unlock(&mrt_lock);
 210         kfree_skb(skb);
 211         return NETDEV_TX_OK;
 212 }
 213
 214 static const struct net_device_ops reg_vif_netdev_ops = {
 215         .ndo_start_xmit = reg_vif_xmit,
 216 };
 217
 218 static void reg_vif_setup(struct net_device *dev)
 219 {
 220         dev->type               = ARPHRD_PIMREG;
 221         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 222         dev->flags              = IFF_NOARP;
 223         dev->netdev_ops         = &reg_vif_netdev_ops,
 224         dev->destructor         = free_netdev;
 225         dev->features           |= NETIF_F_NETNS_LOCAL;
 226 }
 227
 228 static struct net_device *ipmr_reg_vif(struct net *net)
 229 {
 230         struct net_device *dev;
 231         struct in_device *in_dev;
 232
 233         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
 234
 235         if (dev == NULL)
 236                 return NULL;
 237
 238         dev_net_set(dev, net);
 239
 240         if (register_netdevice(dev)) {
 241                 free_netdev(dev);
 242                 return NULL;
 243         }
 244         dev->iflink = 0;
 245
 246         rcu_read_lock();
 247         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
 248                 rcu_read_unlock();
 249                 goto failure;
 250         }
 251
 252         ipv4_devconf_setall(in_dev);
 253         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 254         rcu_read_unlock();
 255
 256         if (dev_open(dev))
 257                 goto failure;
 258
 259         dev_hold(dev);
 260
 261         return dev;
 262
 263 failure:
 264         /* allow the register to be completed before unregistering. */
 265         rtnl_unlock();
 266         rtnl_lock();
 267
 268         unregister_netdevice(dev);
 269         return NULL;
 270 }
 271 #endif
 272
 273 /*
 274  *      Delete a VIF entry
 275  *      @notify: Set to 1, if the caller is a notifier_call
 276  */
 277
 278 static int vif_delete(struct net *net, int vifi, int notify)
 279 {
 280         struct vif_device *v;
 281         struct net_device *dev;
 282         struct in_device *in_dev;
 283
 284         if (vifi < 0 || vifi >= net->ipv4.maxvif)
 285                 return -EADDRNOTAVAIL;
 286
 287         v = &net->ipv4.vif_table[vifi];
 288
 289         write_lock_bh(&mrt_lock);
 290         dev = v->dev;
 291         v->dev = NULL;
 292
 293         if (!dev) {
 294                 write_unlock_bh(&mrt_lock);
 295                 return -EADDRNOTAVAIL;
 296         }
 297
 298 #ifdef CONFIG_IP_PIMSM
 299         if (vifi == net->ipv4.mroute_reg_vif_num)
 300                 net->ipv4.mroute_reg_vif_num = -1;
 301 #endif
 302
 303         if (vifi+1 == net->ipv4.maxvif) {
 304                 int tmp;
 305                 for (tmp=vifi-1; tmp>=0; tmp--) {
 306                         if (VIF_EXISTS(net, tmp))
 307                                 break;
 308                 }
 309                 net->ipv4.maxvif = tmp+1;
 310         }
 311
 312         write_unlock_bh(&mrt_lock);
 313
 314         dev_set_allmulti(dev, -1);
 315
 316         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 317                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
 318                 ip_rt_multicast_event(in_dev);
 319         }
 320
 321         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
 322                 unregister_netdevice(dev);
 323
 324         dev_put(dev);
 325         return 0;
 326 }
 327
 328 static inline void ipmr_cache_free(struct mfc_cache *c)
 329 {
 330         release_net(mfc_net(c));
 331         kmem_cache_free(mrt_cachep, c);
 332 }
 333
 334 /* Destroy an unresolved cache entry, killing queued skbs
 335    and reporting error to netlink readers.
 336  */
 337
 338 static void ipmr_destroy_unres(struct mfc_cache *c)
 339 {
 340         struct sk_buff *skb;
 341         struct nlmsgerr *e;
 342         struct net *net = mfc_net(c);
 343
 344         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 345
 346         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 347                 if (ip_hdr(skb)->version == 0) {
 348                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 349                         nlh->nlmsg_type = NLMSG_ERROR;
 350                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 351                         skb_trim(skb, nlh->nlmsg_len);
 352                         e = NLMSG_DATA(nlh);
 353                         e->error = -ETIMEDOUT;
 354                         memset(&e->msg, 0, sizeof(e->msg));
 355
 356                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 357                 } else
 358                         kfree_skb(skb);
 359         }
 360
 361         ipmr_cache_free(c);
 362 }
 363
 364
 365 /* Single timer process for all the unresolved queue. */
 366
 367 static void ipmr_expire_process(unsigned long dummy)
 368 {
 369         unsigned long now;
 370         unsigned long expires;
 371         struct mfc_cache *c, **cp;
 372
 373         if (!spin_trylock(&mfc_unres_lock)) {
 374                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 375                 return;
 376         }
 377
 378         if (mfc_unres_queue == NULL)
 379                 goto out;
 380
 381         now = jiffies;
 382         expires = 10*HZ;
 383         cp = &mfc_unres_queue;
 384
 385         while ((c=*cp) != NULL) {
 386                 if (time_after(c->mfc_un.unres.expires, now)) {
 387                         unsigned long interval = c->mfc_un.unres.expires - now;
 388                         if (interval < expires)
 389                                 expires = interval;
 390                         cp = &c->next;
 391                         continue;
 392                 }
 393
 394                 *cp = c->next;
 395
 396                 ipmr_destroy_unres(c);
 397         }
 398
 399         if (mfc_unres_queue != NULL)
 400                 mod_timer(&ipmr_expire_timer, jiffies + expires);
 401
 402 out:
 403         spin_unlock(&mfc_unres_lock);
 404 }
 405
 406 /* Fill oifs list. It is called under write locked mrt_lock. */
 407
 408 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 409 {
 410         int vifi;
 411         struct net *net = mfc_net(cache);
 412
 413         cache->mfc_un.res.minvif = MAXVIFS;
 414         cache->mfc_un.res.maxvif = 0;
 415         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 416
 417         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
 418                 if (VIF_EXISTS(net, vifi) &&
 419                     ttls[vifi] && ttls[vifi] < 255) {
 420                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 421                         if (cache->mfc_un.res.minvif > vifi)
 422                                 cache->mfc_un.res.minvif = vifi;
 423                         if (cache->mfc_un.res.maxvif <= vifi)
 424                                 cache->mfc_un.res.maxvif = vifi + 1;
 425                 }
 426         }
 427 }
 428
 429 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 430 {
 431         int vifi = vifc->vifc_vifi;
 432         struct vif_device *v = &net->ipv4.vif_table[vifi];
 433         struct net_device *dev;
 434         struct in_device *in_dev;
 435         int err;
 436
 437         /* Is vif busy ? */
 438         if (VIF_EXISTS(net, vifi))
 439                 return -EADDRINUSE;
 440
 441         switch (vifc->vifc_flags) {
 442 #ifdef CONFIG_IP_PIMSM
 443         case VIFF_REGISTER:
 444                 /*
 445                  * Special Purpose VIF in PIM
 446                  * All the packets will be sent to the daemon
 447                  */
 448                 if (net->ipv4.mroute_reg_vif_num >= 0)
 449                         return -EADDRINUSE;
 450                 dev = ipmr_reg_vif(net);
 451                 if (!dev)
 452                         return -ENOBUFS;
 453                 err = dev_set_allmulti(dev, 1);
 454                 if (err) {
 455                         unregister_netdevice(dev);
 456                         dev_put(dev);
 457                         return err;
 458                 }
 459                 break;
 460 #endif
 461         case VIFF_TUNNEL:
 462                 dev = ipmr_new_tunnel(net, vifc);
 463                 if (!dev)
 464                         return -ENOBUFS;
 465                 err = dev_set_allmulti(dev, 1);
 466                 if (err) {
 467                         ipmr_del_tunnel(dev, vifc);
 468                         dev_put(dev);
 469                         return err;
 470                 }
 471                 break;
 472
 473         case VIFF_USE_IFINDEX:
 474         case 0:
 475                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
 476                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
 477                         if (dev && dev->ip_ptr == NULL) {
 478                                 dev_put(dev);
 479                                 return -EADDRNOTAVAIL;
 480                         }
 481                 } else
 482                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
 483
 484                 if (!dev)
 485                         return -EADDRNOTAVAIL;
 486                 err = dev_set_allmulti(dev, 1);
 487                 if (err) {
 488                         dev_put(dev);
 489                         return err;
 490                 }
 491                 break;
 492         default:
 493                 return -EINVAL;
 494         }
 495
 496         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
 497                 return -EADDRNOTAVAIL;
 498         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
 499         ip_rt_multicast_event(in_dev);
 500
 501         /*
 502          *      Fill in the VIF structures
 503          */
 504         v->rate_limit = vifc->vifc_rate_limit;
 505         v->local = vifc->vifc_lcl_addr.s_addr;
 506         v->remote = vifc->vifc_rmt_addr.s_addr;
 507         v->flags = vifc->vifc_flags;
 508         if (!mrtsock)
 509                 v->flags |= VIFF_STATIC;
 510         v->threshold = vifc->vifc_threshold;
 511         v->bytes_in = 0;
 512         v->bytes_out = 0;
 513         v->pkt_in = 0;
 514         v->pkt_out = 0;
 515         v->link = dev->ifindex;
 516         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 517                 v->link = dev->iflink;
 518
 519         /* And finish update writing critical data */
 520         write_lock_bh(&mrt_lock);
 521         v->dev = dev;
 522 #ifdef CONFIG_IP_PIMSM
 523         if (v->flags&VIFF_REGISTER)
 524                 net->ipv4.mroute_reg_vif_num = vifi;
 525 #endif
 526         if (vifi+1 > net->ipv4.maxvif)
 527                 net->ipv4.maxvif = vifi+1;
 528         write_unlock_bh(&mrt_lock);
 529         return 0;
 530 }
 531
 532 static struct mfc_cache *ipmr_cache_find(struct net *net,
 533                                          __be32 origin,
 534                                          __be32 mcastgrp)
 535 {
 536         int line = MFC_HASH(mcastgrp, origin);
 537         struct mfc_cache *c;
 538
 539         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
 540                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 541                         break;
 542         }
 543         return c;
 544 }
 545
 546 /*
 547  *      Allocate a multicast cache entry
 548  */
 549 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
 550 {
 551         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 552         if (c == NULL)
 553                 return NULL;
 554         c->mfc_un.res.minvif = MAXVIFS;
 555         mfc_net_set(c, net);
 556         return c;
 557 }
 558
 559 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
 560 {
 561         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 562         if (c == NULL)
 563                 return NULL;
 564         skb_queue_head_init(&c->mfc_un.unres.unresolved);
 565         c->mfc_un.unres.expires = jiffies + 10*HZ;
 566         mfc_net_set(c, net);
 567         return c;
 568 }
 569
 570 /*
 571  *      A cache entry has gone into a resolved state from queued
 572  */
 573
 574 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 575 {
 576         struct sk_buff *skb;
 577         struct nlmsgerr *e;
 578
 579         /*
 580          *      Play the pending entries through our router
 581          */
 582
 583         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 584                 if (ip_hdr(skb)->version == 0) {
 585                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 586
 587                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 588                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
 589                                                   (u8 *)nlh);
 590                         } else {
 591                                 nlh->nlmsg_type = NLMSG_ERROR;
 592                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 593                                 skb_trim(skb, nlh->nlmsg_len);
 594                                 e = NLMSG_DATA(nlh);
 595                                 e->error = -EMSGSIZE;
 596                                 memset(&e->msg, 0, sizeof(e->msg));
 597                         }
 598
 599                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
 600                 } else
 601                         ip_mr_forward(skb, c, 0);
 602         }
 603 }
 604
 605 /*
 606  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 607  *      expects the following bizarre scheme.
 608  *
 609  *      Called under mrt_lock.
 610  */
 611
 612 static int ipmr_cache_report(struct net *net,
 613                              struct sk_buff *pkt, vifi_t vifi, int assert)
 614 {
 615         struct sk_buff *skb;
 616         const int ihl = ip_hdrlen(pkt);
 617         struct igmphdr *igmp;
 618         struct igmpmsg *msg;
 619         int ret;
 620
 621 #ifdef CONFIG_IP_PIMSM
 622         if (assert == IGMPMSG_WHOLEPKT)
 623                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 624         else
 625 #endif
 626                 skb = alloc_skb(128, GFP_ATOMIC);
 627
 628         if (!skb)
 629                 return -ENOBUFS;
 630
 631 #ifdef CONFIG_IP_PIMSM
 632         if (assert == IGMPMSG_WHOLEPKT) {
 633                 /* Ugly, but we have no choice with this interface.
 634                    Duplicate old header, fix ihl, length etc.
 635                    And all this only to mangle msg->im_msgtype and
 636                    to set msg->im_mbz to "mbz" :-)
 637                  */
 638                 skb_push(skb, sizeof(struct iphdr));
 639                 skb_reset_network_header(skb);
 640                 skb_reset_transport_header(skb);
 641                 msg = (struct igmpmsg *)skb_network_header(skb);
 642                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 643                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
 644                 msg->im_mbz = 0;
 645                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
 646                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 647                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 648                                              sizeof(struct iphdr));
 649         } else
 650 #endif
 651         {
 652
 653         /*
 654          *      Copy the IP header
 655          */
 656
 657         skb->network_header = skb->tail;
 658         skb_put(skb, ihl);
 659         skb_copy_to_linear_data(skb, pkt->data, ihl);
 660         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
 661         msg = (struct igmpmsg *)skb_network_header(skb);
 662         msg->im_vif = vifi;
 663         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 664
 665         /*
 666          *      Add our header
 667          */
 668
 669         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
 670         igmp->type      =
 671         msg->im_msgtype = assert;
 672         igmp->code      =       0;
 673         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
 674         skb->transport_header = skb->network_header;
 675         }
 676
 677         if (net->ipv4.mroute_sk == NULL) {
 678                 kfree_skb(skb);
 679                 return -EINVAL;
 680         }
 681
 682         /*
 683          *      Deliver to mrouted
 684          */
 685         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
 686         if (ret < 0) {
 687                 if (net_ratelimit())
 688                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 689                 kfree_skb(skb);
 690         }
 691
 692         return ret;
 693 }
 694
 695 /*
 696  *      Queue a packet for resolution. It gets locked cache entry!
 697  */
 698
 699 static int
 700 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 701 {
 702         int err;
 703         struct mfc_cache *c;
 704         const struct iphdr *iph = ip_hdr(skb);
 705
 706         spin_lock_bh(&mfc_unres_lock);
 707         for (c=mfc_unres_queue; c; c=c->next) {
 708                 if (net_eq(mfc_net(c), net) &&
 709                     c->mfc_mcastgrp == iph->daddr &&
 710                     c->mfc_origin == iph->saddr)
 711                         break;
 712         }
 713
 714         if (c == NULL) {
 715                 /*
 716                  *      Create a new entry if allowable
 717                  */
 718
 719                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
 720                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
 721                         spin_unlock_bh(&mfc_unres_lock);
 722
 723                         kfree_skb(skb);
 724                         return -ENOBUFS;
 725                 }
 726
 727                 /*
 728                  *      Fill in the new cache entry
 729                  */
 730                 c->mfc_parent   = -1;
 731                 c->mfc_origin   = iph->saddr;
 732                 c->mfc_mcastgrp = iph->daddr;
 733
 734                 /*
 735                  *      Reflect first query at mrouted.
 736                  */
 737                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
 738                 if (err < 0) {
 739                         /* If the report failed throw the cache entry
 740                            out - Brad Parker
 741                          */
 742                         spin_unlock_bh(&mfc_unres_lock);
 743
 744                         ipmr_cache_free(c);
 745                         kfree_skb(skb);
 746                         return err;
 747                 }
 748
 749                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
 750                 c->next = mfc_unres_queue;
 751                 mfc_unres_queue = c;
 752
 753                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 754         }
 755
 756         /*
 757          *      See if we can append the packet
 758          */
 759         if (c->mfc_un.unres.unresolved.qlen>3) {
 760                 kfree_skb(skb);
 761                 err = -ENOBUFS;
 762         } else {
 763                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
 764                 err = 0;
 765         }
 766
 767         spin_unlock_bh(&mfc_unres_lock);
 768         return err;
 769 }
 770
 771 /*
 772  *      MFC cache manipulation by user space mroute daemon
 773  */
 774
 775 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 776 {
 777         int line;
 778         struct mfc_cache *c, **cp;
 779
 780         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 781
 782         for (cp = &net->ipv4.mfc_cache_array[line];
 783              (c = *cp) != NULL; cp = &c->next) {
 784                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 785                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 786                         write_lock_bh(&mrt_lock);
 787                         *cp = c->next;
 788                         write_unlock_bh(&mrt_lock);
 789
 790                         ipmr_cache_free(c);
 791                         return 0;
 792                 }
 793         }
 794         return -ENOENT;
 795 }
 796
 797 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 798 {
 799         int line;
 800         struct mfc_cache *uc, *c, **cp;
 801
 802         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 803
 804         for (cp = &net->ipv4.mfc_cache_array[line];
 805              (c = *cp) != NULL; cp = &c->next) {
 806                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 807                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 808                         break;
 809         }
 810
 811         if (c != NULL) {
 812                 write_lock_bh(&mrt_lock);
 813                 c->mfc_parent = mfc->mfcc_parent;
 814                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
 815                 if (!mrtsock)
 816                         c->mfc_flags |= MFC_STATIC;
 817                 write_unlock_bh(&mrt_lock);
 818                 return 0;
 819         }
 820
 821         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 822                 return -EINVAL;
 823
 824         c = ipmr_cache_alloc(net);
 825         if (c == NULL)
 826                 return -ENOMEM;
 827
 828         c->mfc_origin = mfc->mfcc_origin.s_addr;
 829         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 830         c->mfc_parent = mfc->mfcc_parent;
 831         ipmr_update_thresholds(c, mfc->mfcc_ttls);
 832         if (!mrtsock)
 833                 c->mfc_flags |= MFC_STATIC;
 834
 835         write_lock_bh(&mrt_lock);
 836         c->next = net->ipv4.mfc_cache_array[line];
 837         net->ipv4.mfc_cache_array[line] = c;
 838         write_unlock_bh(&mrt_lock);
 839
 840         /*
 841          *      Check to see if we resolved a queued list. If so we
 842          *      need to send on the frames and tidy up.
 843          */
 844         spin_lock_bh(&mfc_unres_lock);
 845         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 846              cp = &uc->next) {
 847                 if (net_eq(mfc_net(uc), net) &&
 848                     uc->mfc_origin == c->mfc_origin &&
 849                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 850                         *cp = uc->next;
 851                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 852                         break;
 853                 }
 854         }
 855         if (mfc_unres_queue == NULL)
 856                 del_timer(&ipmr_expire_timer);
 857         spin_unlock_bh(&mfc_unres_lock);
 858
 859         if (uc) {
 860                 ipmr_cache_resolve(uc, c);
 861                 ipmr_cache_free(uc);
 862         }
 863         return 0;
 864 }
 865
 866 /*
 867  *      Close the multicast socket, and clear the vif tables etc
 868  */
 869
 870 static void mroute_clean_tables(struct net *net)
 871 {
 872         int i;
 873
 874         /*
 875          *      Shut down all active vif entries
 876          */
 877         for (i = 0; i < net->ipv4.maxvif; i++) {
 878                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
 879                         vif_delete(net, i, 0);
 880         }
 881
 882         /*
 883          *      Wipe the cache
 884          */
 885         for (i=0; i<MFC_LINES; i++) {
 886                 struct mfc_cache *c, **cp;
 887
 888                 cp = &net->ipv4.mfc_cache_array[i];
 889                 while ((c = *cp) != NULL) {
 890                         if (c->mfc_flags&MFC_STATIC) {
 891                                 cp = &c->next;
 892                                 continue;
 893                         }
 894                         write_lock_bh(&mrt_lock);
 895                         *cp = c->next;
 896                         write_unlock_bh(&mrt_lock);
 897
 898                         ipmr_cache_free(c);
 899                 }
 900         }
 901
 902         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
 903                 struct mfc_cache *c, **cp;
 904
 905                 spin_lock_bh(&mfc_unres_lock);
 906                 cp = &mfc_unres_queue;
 907                 while ((c = *cp) != NULL) {
 908                         if (!net_eq(mfc_net(c), net)) {
 909                                 cp = &c->next;
 910                                 continue;
 911                         }
 912                         *cp = c->next;
 913
 914                         ipmr_destroy_unres(c);
 915                 }
 916                 spin_unlock_bh(&mfc_unres_lock);
 917         }
 918 }
 919
 920 static void mrtsock_destruct(struct sock *sk)
 921 {
 922         struct net *net = sock_net(sk);
 923
 924         rtnl_lock();
 925         if (sk == net->ipv4.mroute_sk) {
 926                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 927
 928                 write_lock_bh(&mrt_lock);
 929                 net->ipv4.mroute_sk = NULL;
 930                 write_unlock_bh(&mrt_lock);
 931
 932                 mroute_clean_tables(net);
 933         }
 934         rtnl_unlock();
 935 }
 936
 937 /*
 938  *      Socket options and virtual interface manipulation. The whole
 939  *      virtual interface system is a complete heap, but unfortunately
 940  *      that's how BSD mrouted happens to think. Maybe one day with a proper
 941  *      MOSPF/PIM router set up we can clean this up.
 942  */
 943
 944 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 945 {
 946         int ret;
 947         struct vifctl vif;
 948         struct mfcctl mfc;
 949         struct net *net = sock_net(sk);
 950
 951         if (optname != MRT_INIT) {
 952                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
 953                         return -EACCES;
 954         }
 955
 956         switch (optname) {
 957         case MRT_INIT:
 958                 if (sk->sk_type != SOCK_RAW ||
 959                     inet_sk(sk)->num != IPPROTO_IGMP)
 960                         return -EOPNOTSUPP;
 961                 if (optlen != sizeof(int))
 962                         return -ENOPROTOOPT;
 963
 964                 rtnl_lock();
 965                 if (net->ipv4.mroute_sk) {
 966                         rtnl_unlock();
 967                         return -EADDRINUSE;
 968                 }
 969
 970                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
 971                 if (ret == 0) {
 972                         write_lock_bh(&mrt_lock);
 973                         net->ipv4.mroute_sk = sk;
 974                         write_unlock_bh(&mrt_lock);
 975
 976                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
 977                 }
 978                 rtnl_unlock();
 979                 return ret;
 980         case MRT_DONE:
 981                 if (sk != net->ipv4.mroute_sk)
 982                         return -EACCES;
 983                 return ip_ra_control(sk, 0, NULL);
 984         case MRT_ADD_VIF:
 985         case MRT_DEL_VIF:
 986                 if (optlen != sizeof(vif))
 987                         return -EINVAL;
 988                 if (copy_from_user(&vif, optval, sizeof(vif)))
 989                         return -EFAULT;
 990                 if (vif.vifc_vifi >= MAXVIFS)
 991                         return -ENFILE;
 992                 rtnl_lock();
 993                 if (optname == MRT_ADD_VIF) {
 994                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
 995                 } else {
 996                         ret = vif_delete(net, vif.vifc_vifi, 0);
 997                 }
 998                 rtnl_unlock();
 999                 return ret;
1000
1001                 /*
1002                  *      Manipulate the forwarding caches. These live
1003                  *      in a sort of kernel/user symbiosis.
1004                  */
1005         case MRT_ADD_MFC:
1006         case MRT_DEL_MFC:
1007                 if (optlen != sizeof(mfc))
1008                         return -EINVAL;
1009                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1010                         return -EFAULT;
1011                 rtnl_lock();
1012                 if (optname == MRT_DEL_MFC)
1013                         ret = ipmr_mfc_delete(net, &mfc);
1014                 else
1015                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1016                 rtnl_unlock();
1017                 return ret;
1018                 /*
1019                  *      Control PIM assert.
1020                  */
1021         case MRT_ASSERT:
1022         {
1023                 int v;
1024                 if (get_user(v,(int __user *)optval))
1025                         return -EFAULT;
1026                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1027                 return 0;
1028         }
1029 #ifdef CONFIG_IP_PIMSM
1030         case MRT_PIM:
1031         {
1032                 int v;
1033
1034                 if (get_user(v,(int __user *)optval))
1035                         return -EFAULT;
1036                 v = (v) ? 1 : 0;
1037
1038                 rtnl_lock();
1039                 ret = 0;
1040                 if (v != net->ipv4.mroute_do_pim) {
1041                         net->ipv4.mroute_do_pim = v;
1042                         net->ipv4.mroute_do_assert = v;
1043                 }
1044                 rtnl_unlock();
1045                 return ret;
1046         }
1047 #endif
1048         /*
1049          *      Spurious command, or MRT_VERSION which you cannot
1050          *      set.
1051          */
1052         default:
1053                 return -ENOPROTOOPT;
1054         }
1055 }
1056
1057 /*
1058  *      Getsock opt support for the multicast routing system.
1059  */
1060
1061 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1062 {
1063         int olr;
1064         int val;
1065         struct net *net = sock_net(sk);
1066
1067         if (optname != MRT_VERSION &&
1068 #ifdef CONFIG_IP_PIMSM
1069            optname!=MRT_PIM &&
1070 #endif
1071            optname!=MRT_ASSERT)
1072                 return -ENOPROTOOPT;
1073
1074         if (get_user(olr, optlen))
1075                 return -EFAULT;
1076
1077         olr = min_t(unsigned int, olr, sizeof(int));
1078         if (olr < 0)
1079                 return -EINVAL;
1080
1081         if (put_user(olr, optlen))
1082                 return -EFAULT;
1083         if (optname == MRT_VERSION)
1084                 val = 0x0305;
1085 #ifdef CONFIG_IP_PIMSM
1086         else if (optname == MRT_PIM)
1087                 val = net->ipv4.mroute_do_pim;
1088 #endif
1089         else
1090                 val = net->ipv4.mroute_do_assert;
1091         if (copy_to_user(optval, &val, olr))
1092                 return -EFAULT;
1093         return 0;
1094 }
1095
1096 /*
1097  *      The IP multicast ioctl support routines.
1098  */
1099
1100 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1101 {
1102         struct sioc_sg_req sr;
1103         struct sioc_vif_req vr;
1104         struct vif_device *vif;
1105         struct mfc_cache *c;
1106         struct net *net = sock_net(sk);
1107
1108         switch (cmd) {
1109         case SIOCGETVIFCNT:
1110                 if (copy_from_user(&vr, arg, sizeof(vr)))
1111                         return -EFAULT;
1112                 if (vr.vifi >= net->ipv4.maxvif)
1113                         return -EINVAL;
1114                 read_lock(&mrt_lock);
1115                 vif = &net->ipv4.vif_table[vr.vifi];
1116                 if (VIF_EXISTS(net, vr.vifi)) {
1117                         vr.icount = vif->pkt_in;
1118                         vr.ocount = vif->pkt_out;
1119                         vr.ibytes = vif->bytes_in;
1120                         vr.obytes = vif->bytes_out;
1121                         read_unlock(&mrt_lock);
1122
1123                         if (copy_to_user(arg, &vr, sizeof(vr)))
1124                                 return -EFAULT;
1125                         return 0;
1126                 }
1127                 read_unlock(&mrt_lock);
1128                 return -EADDRNOTAVAIL;
1129         case SIOCGETSGCNT:
1130                 if (copy_from_user(&sr, arg, sizeof(sr)))
1131                         return -EFAULT;
1132
1133                 read_lock(&mrt_lock);
1134                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1135                 if (c) {
1136                         sr.pktcnt = c->mfc_un.res.pkt;
1137                         sr.bytecnt = c->mfc_un.res.bytes;
1138                         sr.wrong_if = c->mfc_un.res.wrong_if;
1139                         read_unlock(&mrt_lock);
1140
1141                         if (copy_to_user(arg, &sr, sizeof(sr)))
1142                                 return -EFAULT;
1143                         return 0;
1144                 }
1145                 read_unlock(&mrt_lock);
1146                 return -EADDRNOTAVAIL;
1147         default:
1148                 return -ENOIOCTLCMD;
1149         }
1150 }
1151
1152
1153 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1154 {
1155         struct net_device *dev = ptr;
1156         struct net *net = dev_net(dev);
1157         struct vif_device *v;
1158         int ct;
1159
1160         if (!net_eq(dev_net(dev), net))
1161                 return NOTIFY_DONE;
1162
1163         if (event != NETDEV_UNREGISTER)
1164                 return NOTIFY_DONE;
1165         v = &net->ipv4.vif_table[0];
1166         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1167                 if (v->dev == dev)
1168                         vif_delete(net, ct, 1);
1169         }
1170         return NOTIFY_DONE;
1171 }
1172
1173
1174 static struct notifier_block ip_mr_notifier = {
1175         .notifier_call = ipmr_device_event,
1176 };
1177
1178 /*
1179  *      Encapsulate a packet by attaching a valid IPIP header to it.
1180  *      This avoids tunnel drivers and other mess and gives us the speed so
1181  *      important for multicast video.
1182  */
1183
1184 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1185 {
1186         struct iphdr *iph;
1187         struct iphdr *old_iph = ip_hdr(skb);
1188
1189         skb_push(skb, sizeof(struct iphdr));
1190         skb->transport_header = skb->network_header;
1191         skb_reset_network_header(skb);
1192         iph = ip_hdr(skb);
1193
1194         iph->version    =       4;
1195         iph->tos        =       old_iph->tos;
1196         iph->ttl        =       old_iph->ttl;
1197         iph->frag_off   =       0;
1198         iph->daddr      =       daddr;
1199         iph->saddr      =       saddr;
1200         iph->protocol   =       IPPROTO_IPIP;
1201         iph->ihl        =       5;
1202         iph->tot_len    =       htons(skb->len);
1203         ip_select_ident(iph, skb_dst(skb), NULL);
1204         ip_send_check(iph);
1205
1206         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1207         nf_reset(skb);
1208 }
1209
1210 static inline int ipmr_forward_finish(struct sk_buff *skb)
1211 {
1212         struct ip_options * opt = &(IPCB(skb)->opt);
1213
1214         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1215
1216         if (unlikely(opt->optlen))
1217                 ip_forward_options(skb);
1218
1219         return dst_output(skb);
1220 }
1221
1222 /*
1223  *      Processing handlers for ipmr_forward
1224  */
1225
1226 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1227 {
1228         struct net *net = mfc_net(c);
1229         const struct iphdr *iph = ip_hdr(skb);
1230         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1231         struct net_device *dev;
1232         struct rtable *rt;
1233         int    encap = 0;
1234
1235         if (vif->dev == NULL)
1236                 goto out_free;
1237
1238 #ifdef CONFIG_IP_PIMSM
1239         if (vif->flags & VIFF_REGISTER) {
1240                 vif->pkt_out++;
1241                 vif->bytes_out += skb->len;
1242                 vif->dev->stats.tx_bytes += skb->len;
1243                 vif->dev->stats.tx_packets++;
1244                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1245                 goto out_free;
1246         }
1247 #endif
1248
1249         if (vif->flags&VIFF_TUNNEL) {
1250                 struct flowi fl = { .oif = vif->link,
1251                                     .nl_u = { .ip4_u =
1252                                               { .daddr = vif->remote,
1253                                                 .saddr = vif->local,
1254                                                 .tos = RT_TOS(iph->tos) } },
1255                                     .proto = IPPROTO_IPIP };
1256                 if (ip_route_output_key(net, &rt, &fl))
1257                         goto out_free;
1258                 encap = sizeof(struct iphdr);
1259         } else {
1260                 struct flowi fl = { .oif = vif->link,
1261                                     .nl_u = { .ip4_u =
1262                                               { .daddr = iph->daddr,
1263                                                 .tos = RT_TOS(iph->tos) } },
1264                                     .proto = IPPROTO_IPIP };
1265                 if (ip_route_output_key(net, &rt, &fl))
1266                         goto out_free;
1267         }
1268
1269         dev = rt->u.dst.dev;
1270
1271         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1272                 /* Do not fragment multicasts. Alas, IPv4 does not
1273                    allow to send ICMP, so that packets will disappear
1274                    to blackhole.
1275                  */
1276
1277                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1278                 ip_rt_put(rt);
1279                 goto out_free;
1280         }
1281
1282         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1283
1284         if (skb_cow(skb, encap)) {
1285                 ip_rt_put(rt);
1286                 goto out_free;
1287         }
1288
1289         vif->pkt_out++;
1290         vif->bytes_out += skb->len;
1291
1292         skb_dst_drop(skb);
1293         skb_dst_set(skb, &rt->u.dst);
1294         ip_decrease_ttl(ip_hdr(skb));
1295
1296         /* FIXME: forward and output firewalls used to be called here.
1297          * What do we do with netfilter? -- RR */
1298         if (vif->flags & VIFF_TUNNEL) {
1299                 ip_encap(skb, vif->local, vif->remote);
1300                 /* FIXME: extra output firewall step used to be here. --RR */
1301                 vif->dev->stats.tx_packets++;
1302                 vif->dev->stats.tx_bytes += skb->len;
1303         }
1304
1305         IPCB(skb)->flags |= IPSKB_FORWARDED;
1306
1307         /*
1308          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1309          * not only before forwarding, but after forwarding on all output
1310          * interfaces. It is clear, if mrouter runs a multicasting
1311          * program, it should receive packets not depending to what interface
1312          * program is joined.
1313          * If we will not make it, the program will have to join on all
1314          * interfaces. On the other hand, multihoming host (or router, but
1315          * not mrouter) cannot join to more than one interface - it will
1316          * result in receiving multiple packets.
1317          */
1318         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1319                 ipmr_forward_finish);
1320         return;
1321
1322 out_free:
1323         kfree_skb(skb);
1324         return;
1325 }
1326
1327 static int ipmr_find_vif(struct net_device *dev)
1328 {
1329         struct net *net = dev_net(dev);
1330         int ct;
1331         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1332                 if (net->ipv4.vif_table[ct].dev == dev)
1333                         break;
1334         }
1335         return ct;
1336 }
1337
1338 /* "local" means that we should preserve one skb (for local delivery) */
1339
1340 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1341 {
1342         int psend = -1;
1343         int vif, ct;
1344         struct net *net = mfc_net(cache);
1345
1346         vif = cache->mfc_parent;
1347         cache->mfc_un.res.pkt++;
1348         cache->mfc_un.res.bytes += skb->len;
1349
1350         /*
1351          * Wrong interface: drop packet and (maybe) send PIM assert.
1352          */
1353         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1354                 int true_vifi;
1355
1356                 if (skb_rtable(skb)->fl.iif == 0) {
1357                         /* It is our own packet, looped back.
1358                            Very complicated situation...
1359
1360                            The best workaround until routing daemons will be
1361                            fixed is not to redistribute packet, if it was
1362                            send through wrong interface. It means, that
1363                            multicast applications WILL NOT work for
1364                            (S,G), which have default multicast route pointing
1365                            to wrong oif. In any case, it is not a good
1366                            idea to use multicasting applications on router.
1367                          */
1368                         goto dont_forward;
1369                 }
1370
1371                 cache->mfc_un.res.wrong_if++;
1372                 true_vifi = ipmr_find_vif(skb->dev);
1373
1374                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1375                     /* pimsm uses asserts, when switching from RPT to SPT,
1376                        so that we cannot check that packet arrived on an oif.
1377                        It is bad, but otherwise we would need to move pretty
1378                        large chunk of pimd to kernel. Ough... --ANK
1379                      */
1380                     (net->ipv4.mroute_do_pim ||
1381                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1382                     time_after(jiffies,
1383                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1384                         cache->mfc_un.res.last_assert = jiffies;
1385                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1386                 }
1387                 goto dont_forward;
1388         }
1389
1390         net->ipv4.vif_table[vif].pkt_in++;
1391         net->ipv4.vif_table[vif].bytes_in += skb->len;
1392
1393         /*
1394          *      Forward the frame
1395          */
1396         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1397                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1398                         if (psend != -1) {
1399                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1400                                 if (skb2)
1401                                         ipmr_queue_xmit(skb2, cache, psend);
1402                         }
1403                         psend = ct;
1404                 }
1405         }
1406         if (psend != -1) {
1407                 if (local) {
1408                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1409                         if (skb2)
1410                                 ipmr_queue_xmit(skb2, cache, psend);
1411                 } else {
1412                         ipmr_queue_xmit(skb, cache, psend);
1413                         return 0;
1414                 }
1415         }
1416
1417 dont_forward:
1418         if (!local)
1419                 kfree_skb(skb);
1420         return 0;
1421 }
1422
1423
1424 /*
1425  *      Multicast packets for forwarding arrive here
1426  */
1427
1428 int ip_mr_input(struct sk_buff *skb)
1429 {
1430         struct mfc_cache *cache;
1431         struct net *net = dev_net(skb->dev);
1432         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1433
1434         /* Packet is looped back after forward, it should not be
1435            forwarded second time, but still can be delivered locally.
1436          */
1437         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1438                 goto dont_forward;
1439
1440         if (!local) {
1441                     if (IPCB(skb)->opt.router_alert) {
1442                             if (ip_call_ra_chain(skb))
1443                                     return 0;
1444                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1445                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1446                                Cisco IOS <= 11.2(8)) do not put router alert
1447                                option to IGMP packets destined to routable
1448                                groups. It is very bad, because it means
1449                                that we can forward NO IGMP messages.
1450                              */
1451                             read_lock(&mrt_lock);
1452                             if (net->ipv4.mroute_sk) {
1453                                     nf_reset(skb);
1454                                     raw_rcv(net->ipv4.mroute_sk, skb);
1455                                     read_unlock(&mrt_lock);
1456                                     return 0;
1457                             }
1458                             read_unlock(&mrt_lock);
1459                     }
1460         }
1461
1462         read_lock(&mrt_lock);
1463         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1464
1465         /*
1466          *      No usable cache entry
1467          */
1468         if (cache == NULL) {
1469                 int vif;
1470
1471                 if (local) {
1472                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1473                         ip_local_deliver(skb);
1474                         if (skb2 == NULL) {
1475                                 read_unlock(&mrt_lock);
1476                                 return -ENOBUFS;
1477                         }
1478                         skb = skb2;
1479                 }
1480
1481                 vif = ipmr_find_vif(skb->dev);
1482                 if (vif >= 0) {
1483                         int err = ipmr_cache_unresolved(net, vif, skb);
1484                         read_unlock(&mrt_lock);
1485
1486                         return err;
1487                 }
1488                 read_unlock(&mrt_lock);
1489                 kfree_skb(skb);
1490                 return -ENODEV;
1491         }
1492
1493         ip_mr_forward(skb, cache, local);
1494
1495         read_unlock(&mrt_lock);
1496
1497         if (local)
1498                 return ip_local_deliver(skb);
1499
1500         return 0;
1501
1502 dont_forward:
1503         if (local)
1504                 return ip_local_deliver(skb);
1505         kfree_skb(skb);
1506         return 0;
1507 }
1508
1509 #ifdef CONFIG_IP_PIMSM
1510 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1511 {
1512         struct net_device *reg_dev = NULL;
1513         struct iphdr *encap;
1514         struct net *net = dev_net(skb->dev);
1515
1516         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1517         /*
1518            Check that:
1519            a. packet is really destinted to a multicast group
1520            b. packet is not a NULL-REGISTER
1521            c. packet is not truncated
1522          */
1523         if (!ipv4_is_multicast(encap->daddr) ||
1524             encap->tot_len == 0 ||
1525             ntohs(encap->tot_len) + pimlen > skb->len)
1526                 return 1;
1527
1528         read_lock(&mrt_lock);
1529         if (net->ipv4.mroute_reg_vif_num >= 0)
1530                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1531         if (reg_dev)
1532                 dev_hold(reg_dev);
1533         read_unlock(&mrt_lock);
1534
1535         if (reg_dev == NULL)
1536                 return 1;
1537
1538         skb->mac_header = skb->network_header;
1539         skb_pull(skb, (u8*)encap - skb->data);
1540         skb_reset_network_header(skb);
1541         skb->dev = reg_dev;
1542         skb->protocol = htons(ETH_P_IP);
1543         skb->ip_summed = 0;
1544         skb->pkt_type = PACKET_HOST;
1545         skb_dst_drop(skb);
1546         reg_dev->stats.rx_bytes += skb->len;
1547         reg_dev->stats.rx_packets++;
1548         nf_reset(skb);
1549         netif_rx(skb);
1550         dev_put(reg_dev);
1551
1552         return 0;
1553 }
1554 #endif
1555
1556 #ifdef CONFIG_IP_PIMSM_V1
1557 /*
1558  * Handle IGMP messages of PIMv1
1559  */
1560
1561 int pim_rcv_v1(struct sk_buff * skb)
1562 {
1563         struct igmphdr *pim;
1564         struct net *net = dev_net(skb->dev);
1565
1566         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1567                 goto drop;
1568
1569         pim = igmp_hdr(skb);
1570
1571         if (!net->ipv4.mroute_do_pim ||
1572             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1573                 goto drop;
1574
1575         if (__pim_rcv(skb, sizeof(*pim))) {
1576 drop:
1577                 kfree_skb(skb);
1578         }
1579         return 0;
1580 }
1581 #endif
1582
1583 #ifdef CONFIG_IP_PIMSM_V2
1584 static int pim_rcv(struct sk_buff * skb)
1585 {
1586         struct pimreghdr *pim;
1587
1588         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1589                 goto drop;
1590
1591         pim = (struct pimreghdr *)skb_transport_header(skb);
1592         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1593             (pim->flags&PIM_NULL_REGISTER) ||
1594             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1595              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1596                 goto drop;
1597
1598         if (__pim_rcv(skb, sizeof(*pim))) {
1599 drop:
1600                 kfree_skb(skb);
1601         }
1602         return 0;
1603 }
1604 #endif
1605
1606 static int
1607 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1608 {
1609         int ct;
1610         struct rtnexthop *nhp;
1611         struct net *net = mfc_net(c);
1612         struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1613         u8 *b = skb_tail_pointer(skb);
1614         struct rtattr *mp_head;
1615
1616         if (dev)
1617                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1618
1619         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1620
1621         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1622                 if (c->mfc_un.res.ttls[ct] < 255) {
1623                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1624                                 goto rtattr_failure;
1625                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1626                         nhp->rtnh_flags = 0;
1627                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1628                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1629                         nhp->rtnh_len = sizeof(*nhp);
1630                 }
1631         }
1632         mp_head->rta_type = RTA_MULTIPATH;
1633         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1634         rtm->rtm_type = RTN_MULTICAST;
1635         return 1;
1636
1637 rtattr_failure:
1638         nlmsg_trim(skb, b);
1639         return -EMSGSIZE;
1640 }
1641
1642 int ipmr_get_route(struct net *net,
1643                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1644 {
1645         int err;
1646         struct mfc_cache *cache;
1647         struct rtable *rt = skb_rtable(skb);
1648
1649         read_lock(&mrt_lock);
1650         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1651
1652         if (cache == NULL) {
1653                 struct sk_buff *skb2;
1654                 struct iphdr *iph;
1655                 struct net_device *dev;
1656                 int vif;
1657
1658                 if (nowait) {
1659                         read_unlock(&mrt_lock);
1660                         return -EAGAIN;
1661                 }
1662
1663                 dev = skb->dev;
1664                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1665                         read_unlock(&mrt_lock);
1666                         return -ENODEV;
1667                 }
1668                 skb2 = skb_clone(skb, GFP_ATOMIC);
1669                 if (!skb2) {
1670                         read_unlock(&mrt_lock);
1671                         return -ENOMEM;
1672                 }
1673
1674                 skb_push(skb2, sizeof(struct iphdr));
1675                 skb_reset_network_header(skb2);
1676                 iph = ip_hdr(skb2);
1677                 iph->ihl = sizeof(struct iphdr) >> 2;
1678                 iph->saddr = rt->rt_src;
1679                 iph->daddr = rt->rt_dst;
1680                 iph->version = 0;
1681                 err = ipmr_cache_unresolved(net, vif, skb2);
1682                 read_unlock(&mrt_lock);
1683                 return err;
1684         }
1685
1686         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1687                 cache->mfc_flags |= MFC_NOTIFY;
1688         err = ipmr_fill_mroute(skb, cache, rtm);
1689         read_unlock(&mrt_lock);
1690         return err;
1691 }
1692
1693 #ifdef CONFIG_PROC_FS
1694 /*
1695  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1696  */
1697 struct ipmr_vif_iter {
1698         struct seq_net_private p;
1699         int ct;
1700 };
1701
1702 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1703                                            struct ipmr_vif_iter *iter,
1704                                            loff_t pos)
1705 {
1706         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1707                 if (!VIF_EXISTS(net, iter->ct))
1708                         continue;
1709                 if (pos-- == 0)
1710                         return &net->ipv4.vif_table[iter->ct];
1711         }
1712         return NULL;
1713 }
1714
1715 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1716         __acquires(mrt_lock)
1717 {
1718         struct net *net = seq_file_net(seq);
1719
1720         read_lock(&mrt_lock);
1721         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1722                 : SEQ_START_TOKEN;
1723 }
1724
1725 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1726 {
1727         struct ipmr_vif_iter *iter = seq->private;
1728         struct net *net = seq_file_net(seq);
1729
1730         ++*pos;
1731         if (v == SEQ_START_TOKEN)
1732                 return ipmr_vif_seq_idx(net, iter, 0);
1733
1734         while (++iter->ct < net->ipv4.maxvif) {
1735                 if (!VIF_EXISTS(net, iter->ct))
1736                         continue;
1737                 return &net->ipv4.vif_table[iter->ct];
1738         }
1739         return NULL;
1740 }
1741
1742 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1743         __releases(mrt_lock)
1744 {
1745         read_unlock(&mrt_lock);
1746 }
1747
1748 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1749 {
1750         struct net *net = seq_file_net(seq);
1751
1752         if (v == SEQ_START_TOKEN) {
1753                 seq_puts(seq,
1754                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1755         } else {
1756                 const struct vif_device *vif = v;
1757                 const char *name =  vif->dev ? vif->dev->name : "none";
1758
1759                 seq_printf(seq,
1760                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1761                            vif - net->ipv4.vif_table,
1762                            name, vif->bytes_in, vif->pkt_in,
1763                            vif->bytes_out, vif->pkt_out,
1764                            vif->flags, vif->local, vif->remote);
1765         }
1766         return 0;
1767 }
1768
1769 static const struct seq_operations ipmr_vif_seq_ops = {
1770         .start = ipmr_vif_seq_start,
1771         .next  = ipmr_vif_seq_next,
1772         .stop  = ipmr_vif_seq_stop,
1773         .show  = ipmr_vif_seq_show,
1774 };
1775
1776 static int ipmr_vif_open(struct inode *inode, struct file *file)
1777 {
1778         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1779                             sizeof(struct ipmr_vif_iter));
1780 }
1781
1782 static const struct file_operations ipmr_vif_fops = {
1783         .owner   = THIS_MODULE,
1784         .open    = ipmr_vif_open,
1785         .read    = seq_read,
1786         .llseek  = seq_lseek,
1787         .release = seq_release_net,
1788 };
1789
1790 struct ipmr_mfc_iter {
1791         struct seq_net_private p;
1792         struct mfc_cache **cache;
1793         int ct;
1794 };
1795
1796
1797 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1798                                           struct ipmr_mfc_iter *it, loff_t pos)
1799 {
1800         struct mfc_cache *mfc;
1801
1802         it->cache = net->ipv4.mfc_cache_array;
1803         read_lock(&mrt_lock);
1804         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1805                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1806                      mfc; mfc = mfc->next)
1807                         if (pos-- == 0)
1808                                 return mfc;
1809         read_unlock(&mrt_lock);
1810
1811         it->cache = &mfc_unres_queue;
1812         spin_lock_bh(&mfc_unres_lock);
1813         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1814                 if (net_eq(mfc_net(mfc), net) &&
1815                     pos-- == 0)
1816                         return mfc;
1817         spin_unlock_bh(&mfc_unres_lock);
1818
1819         it->cache = NULL;
1820         return NULL;
1821 }
1822
1823
1824 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1825 {
1826         struct ipmr_mfc_iter *it = seq->private;
1827         struct net *net = seq_file_net(seq);
1828
1829         it->cache = NULL;
1830         it->ct = 0;
1831         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1832                 : SEQ_START_TOKEN;
1833 }
1834
1835 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1836 {
1837         struct mfc_cache *mfc = v;
1838         struct ipmr_mfc_iter *it = seq->private;
1839         struct net *net = seq_file_net(seq);
1840
1841         ++*pos;
1842
1843         if (v == SEQ_START_TOKEN)
1844                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1845
1846         if (mfc->next)
1847                 return mfc->next;
1848
1849         if (it->cache == &mfc_unres_queue)
1850                 goto end_of_list;
1851
1852         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1853
1854         while (++it->ct < MFC_LINES) {
1855                 mfc = net->ipv4.mfc_cache_array[it->ct];
1856                 if (mfc)
1857                         return mfc;
1858         }
1859
1860         /* exhausted cache_array, show unresolved */
1861         read_unlock(&mrt_lock);
1862         it->cache = &mfc_unres_queue;
1863         it->ct = 0;
1864
1865         spin_lock_bh(&mfc_unres_lock);
1866         mfc = mfc_unres_queue;
1867         while (mfc && !net_eq(mfc_net(mfc), net))
1868                 mfc = mfc->next;
1869         if (mfc)
1870                 return mfc;
1871
1872  end_of_list:
1873         spin_unlock_bh(&mfc_unres_lock);
1874         it->cache = NULL;
1875
1876         return NULL;
1877 }
1878
1879 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1880 {
1881         struct ipmr_mfc_iter *it = seq->private;
1882         struct net *net = seq_file_net(seq);
1883
1884         if (it->cache == &mfc_unres_queue)
1885                 spin_unlock_bh(&mfc_unres_lock);
1886         else if (it->cache == net->ipv4.mfc_cache_array)
1887                 read_unlock(&mrt_lock);
1888 }
1889
1890 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1891 {
1892         int n;
1893         struct net *net = seq_file_net(seq);
1894
1895         if (v == SEQ_START_TOKEN) {
1896                 seq_puts(seq,
1897                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1898         } else {
1899                 const struct mfc_cache *mfc = v;
1900                 const struct ipmr_mfc_iter *it = seq->private;
1901
1902                 seq_printf(seq, "%08lX %08lX %-3hd",
1903                            (unsigned long) mfc->mfc_mcastgrp,
1904                            (unsigned long) mfc->mfc_origin,
1905                            mfc->mfc_parent);
1906
1907                 if (it->cache != &mfc_unres_queue) {
1908                         seq_printf(seq, " %8lu %8lu %8lu",
1909                                    mfc->mfc_un.res.pkt,
1910                                    mfc->mfc_un.res.bytes,
1911                                    mfc->mfc_un.res.wrong_if);
1912                         for (n = mfc->mfc_un.res.minvif;
1913                              n < mfc->mfc_un.res.maxvif; n++ ) {
1914                                 if (VIF_EXISTS(net, n) &&
1915                                     mfc->mfc_un.res.ttls[n] < 255)
1916                                         seq_printf(seq,
1917                                            " %2d:%-3d",
1918                                            n, mfc->mfc_un.res.ttls[n]);
1919                         }
1920                 } else {
1921                         /* unresolved mfc_caches don't contain
1922                          * pkt, bytes and wrong_if values
1923                          */
1924                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1925                 }
1926                 seq_putc(seq, '\n');
1927         }
1928         return 0;
1929 }
1930
1931 static const struct seq_operations ipmr_mfc_seq_ops = {
1932         .start = ipmr_mfc_seq_start,
1933         .next  = ipmr_mfc_seq_next,
1934         .stop  = ipmr_mfc_seq_stop,
1935         .show  = ipmr_mfc_seq_show,
1936 };
1937
1938 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1939 {
1940         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1941                             sizeof(struct ipmr_mfc_iter));
1942 }
1943
1944 static const struct file_operations ipmr_mfc_fops = {
1945         .owner   = THIS_MODULE,
1946         .open    = ipmr_mfc_open,
1947         .read    = seq_read,
1948         .llseek  = seq_lseek,
1949         .release = seq_release_net,
1950 };
1951 #endif
1952
1953 #ifdef CONFIG_IP_PIMSM_V2
1954 static const struct net_protocol pim_protocol = {
1955         .handler        =       pim_rcv,
1956         .netns_ok       =       1,
1957 };
1958 #endif
1959
1960
1961 /*
1962  *      Setup for IP multicast routing
1963  */
1964 static int __net_init ipmr_net_init(struct net *net)
1965 {
1966         int err = 0;
1967
1968         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1969                                       GFP_KERNEL);
1970         if (!net->ipv4.vif_table) {
1971                 err = -ENOMEM;
1972                 goto fail;
1973         }
1974
1975         /* Forwarding cache */
1976         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1977                                             sizeof(struct mfc_cache *),
1978                                             GFP_KERNEL);
1979         if (!net->ipv4.mfc_cache_array) {
1980                 err = -ENOMEM;
1981                 goto fail_mfc_cache;
1982         }
1983
1984 #ifdef CONFIG_IP_PIMSM
1985         net->ipv4.mroute_reg_vif_num = -1;
1986 #endif
1987
1988 #ifdef CONFIG_PROC_FS
1989         err = -ENOMEM;
1990         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1991                 goto proc_vif_fail;
1992         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1993                 goto proc_cache_fail;
1994 #endif
1995         return 0;
1996
1997 #ifdef CONFIG_PROC_FS
1998 proc_cache_fail:
1999         proc_net_remove(net, "ip_mr_vif");
2000 proc_vif_fail:
2001         kfree(net->ipv4.mfc_cache_array);
2002 #endif
2003 fail_mfc_cache:
2004         kfree(net->ipv4.vif_table);
2005 fail:
2006         return err;
2007 }
2008
2009 static void __net_exit ipmr_net_exit(struct net *net)
2010 {
2011 #ifdef CONFIG_PROC_FS
2012         proc_net_remove(net, "ip_mr_cache");
2013         proc_net_remove(net, "ip_mr_vif");
2014 #endif
2015         kfree(net->ipv4.mfc_cache_array);
2016         kfree(net->ipv4.vif_table);
2017 }
2018
2019 static struct pernet_operations ipmr_net_ops = {
2020         .init = ipmr_net_init,
2021         .exit = ipmr_net_exit,
2022 };
2023
2024 int __init ip_mr_init(void)
2025 {
2026         int err;
2027
2028         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2029                                        sizeof(struct mfc_cache),
2030                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2031                                        NULL);
2032         if (!mrt_cachep)
2033                 return -ENOMEM;
2034
2035         err = register_pernet_subsys(&ipmr_net_ops);
2036         if (err)
2037                 goto reg_pernet_fail;
2038
2039         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2040         err = register_netdevice_notifier(&ip_mr_notifier);
2041         if (err)
2042                 goto reg_notif_fail;
2043 #ifdef CONFIG_IP_PIMSM_V2
2044         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2045                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2046                 err = -EAGAIN;
2047                 goto add_proto_fail;
2048         }
2049 #endif
2050         return 0;
2051
2052 #ifdef CONFIG_IP_PIMSM_V2
2053 add_proto_fail:
2054         unregister_netdevice_notifier(&ip_mr_notifier);
2055 #endif
2056 reg_notif_fail:
2057         del_timer(&ipmr_expire_timer);
2058         unregister_pernet_subsys(&ipmr_net_ops);
2059 reg_pernet_fail:
2060         kmem_cache_destroy(mrt_cachep);
2061         return err;
2062 }