writeback: split writeback_inodes_wb
[linux-2.6/next.git] / net / ipv4 / ipmr.c
blob757f25eb9b4b2404ebebc6c4422b4ad1693ea227
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requirement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
70 #endif
72 struct mr_table {
73 struct list_head list;
74 #ifdef CONFIG_NET_NS
75 struct net *net;
76 #endif
77 u32 id;
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89 #endif
92 struct ipmr_rule {
93 struct fib_rule common;
96 struct ipmr_result {
97 struct mr_table *mrt;
100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
104 static DEFINE_RWLOCK(mrt_lock);
107 * Multicast router control variables
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock);
115 /* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
120 In this case data path is free of exclusive locks at all.
123 static struct kmem_cache *mrt_cachep __read_mostly;
125 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129 static int ipmr_cache_report(struct mr_table *mrt,
130 struct sk_buff *pkt, vifi_t vifi, int assert);
131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
133 static void ipmr_expire_process(unsigned long arg);
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
139 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
141 struct mr_table *mrt;
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
147 return NULL;
150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
191 return 1;
194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
201 return 0;
204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
207 return 1;
210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220 .family = RTNL_FAMILY_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
234 static int __net_init ipmr_rules_init(struct net *net)
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
259 err2:
260 kfree(mrt);
261 err1:
262 fib_rules_unregister(ops);
263 return err;
266 static void __net_exit ipmr_rules_exit(struct net *net)
268 struct mr_table *mrt, *next;
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
272 kfree(mrt);
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
276 #else
277 #define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
280 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
282 return net->ipv4.mrt;
285 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
288 *mrt = net->ipv4.mrt;
289 return 0;
292 static int __net_init ipmr_rules_init(struct net *net)
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
298 static void __net_exit ipmr_rules_exit(struct net *net)
300 kfree(net->ipv4.mrt);
302 #endif
304 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
306 struct mr_table *mrt;
307 unsigned int i;
309 mrt = ipmr_get_table(net, id);
310 if (mrt != NULL)
311 return mrt;
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 if (mrt == NULL)
315 return NULL;
316 write_pnet(&mrt->net, net);
317 mrt->id = id;
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 (unsigned long)mrt);
328 #ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
330 #endif
331 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333 #endif
334 return mrt;
337 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
339 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
341 struct net *net = dev_net(dev);
343 dev_close(dev);
345 dev = __dev_get_by_name(net, "tunl0");
346 if (dev) {
347 const struct net_device_ops *ops = dev->netdev_ops;
348 struct ifreq ifr;
349 struct ip_tunnel_parm p;
351 memset(&p, 0, sizeof(p));
352 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353 p.iph.saddr = v->vifc_lcl_addr.s_addr;
354 p.iph.version = 4;
355 p.iph.ihl = 5;
356 p.iph.protocol = IPPROTO_IPIP;
357 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
360 if (ops->ndo_do_ioctl) {
361 mm_segment_t oldfs = get_fs();
363 set_fs(KERNEL_DS);
364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365 set_fs(oldfs);
370 static
371 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
373 struct net_device *dev;
375 dev = __dev_get_by_name(net, "tunl0");
377 if (dev) {
378 const struct net_device_ops *ops = dev->netdev_ops;
379 int err;
380 struct ifreq ifr;
381 struct ip_tunnel_parm p;
382 struct in_device *in_dev;
384 memset(&p, 0, sizeof(p));
385 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386 p.iph.saddr = v->vifc_lcl_addr.s_addr;
387 p.iph.version = 4;
388 p.iph.ihl = 5;
389 p.iph.protocol = IPPROTO_IPIP;
390 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
393 if (ops->ndo_do_ioctl) {
394 mm_segment_t oldfs = get_fs();
396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs);
399 } else
400 err = -EOPNOTSUPP;
402 dev = NULL;
404 if (err == 0 &&
405 (dev = __dev_get_by_name(net, p.name)) != NULL) {
406 dev->flags |= IFF_MULTICAST;
408 in_dev = __in_dev_get_rtnl(dev);
409 if (in_dev == NULL)
410 goto failure;
412 ipv4_devconf_setall(in_dev);
413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
415 if (dev_open(dev))
416 goto failure;
417 dev_hold(dev);
420 return dev;
422 failure:
423 /* allow the register to be completed before unregistering. */
424 rtnl_unlock();
425 rtnl_lock();
427 unregister_netdevice(dev);
428 return NULL;
431 #ifdef CONFIG_IP_PIMSM
433 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
435 struct net *net = dev_net(dev);
436 struct mr_table *mrt;
437 struct flowi fl = {
438 .oif = dev->ifindex,
439 .iif = skb->skb_iif,
440 .mark = skb->mark,
442 int err;
444 err = ipmr_fib_lookup(net, &fl, &mrt);
445 if (err < 0)
446 return err;
448 read_lock(&mrt_lock);
449 dev->stats.tx_bytes += skb->len;
450 dev->stats.tx_packets++;
451 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
452 read_unlock(&mrt_lock);
453 kfree_skb(skb);
454 return NETDEV_TX_OK;
457 static const struct net_device_ops reg_vif_netdev_ops = {
458 .ndo_start_xmit = reg_vif_xmit,
461 static void reg_vif_setup(struct net_device *dev)
463 dev->type = ARPHRD_PIMREG;
464 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
465 dev->flags = IFF_NOARP;
466 dev->netdev_ops = &reg_vif_netdev_ops,
467 dev->destructor = free_netdev;
468 dev->features |= NETIF_F_NETNS_LOCAL;
471 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
473 struct net_device *dev;
474 struct in_device *in_dev;
475 char name[IFNAMSIZ];
477 if (mrt->id == RT_TABLE_DEFAULT)
478 sprintf(name, "pimreg");
479 else
480 sprintf(name, "pimreg%u", mrt->id);
482 dev = alloc_netdev(0, name, reg_vif_setup);
484 if (dev == NULL)
485 return NULL;
487 dev_net_set(dev, net);
489 if (register_netdevice(dev)) {
490 free_netdev(dev);
491 return NULL;
493 dev->iflink = 0;
495 rcu_read_lock();
496 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
497 rcu_read_unlock();
498 goto failure;
501 ipv4_devconf_setall(in_dev);
502 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
503 rcu_read_unlock();
505 if (dev_open(dev))
506 goto failure;
508 dev_hold(dev);
510 return dev;
512 failure:
513 /* allow the register to be completed before unregistering. */
514 rtnl_unlock();
515 rtnl_lock();
517 unregister_netdevice(dev);
518 return NULL;
520 #endif
523 * Delete a VIF entry
524 * @notify: Set to 1, if the caller is a notifier_call
527 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
528 struct list_head *head)
530 struct vif_device *v;
531 struct net_device *dev;
532 struct in_device *in_dev;
534 if (vifi < 0 || vifi >= mrt->maxvif)
535 return -EADDRNOTAVAIL;
537 v = &mrt->vif_table[vifi];
539 write_lock_bh(&mrt_lock);
540 dev = v->dev;
541 v->dev = NULL;
543 if (!dev) {
544 write_unlock_bh(&mrt_lock);
545 return -EADDRNOTAVAIL;
548 #ifdef CONFIG_IP_PIMSM
549 if (vifi == mrt->mroute_reg_vif_num)
550 mrt->mroute_reg_vif_num = -1;
551 #endif
553 if (vifi+1 == mrt->maxvif) {
554 int tmp;
555 for (tmp=vifi-1; tmp>=0; tmp--) {
556 if (VIF_EXISTS(mrt, tmp))
557 break;
559 mrt->maxvif = tmp+1;
562 write_unlock_bh(&mrt_lock);
564 dev_set_allmulti(dev, -1);
566 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
567 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
568 ip_rt_multicast_event(in_dev);
571 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
572 unregister_netdevice_queue(dev, head);
574 dev_put(dev);
575 return 0;
578 static inline void ipmr_cache_free(struct mfc_cache *c)
580 kmem_cache_free(mrt_cachep, c);
583 /* Destroy an unresolved cache entry, killing queued skbs
584 and reporting error to netlink readers.
587 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
589 struct net *net = read_pnet(&mrt->net);
590 struct sk_buff *skb;
591 struct nlmsgerr *e;
593 atomic_dec(&mrt->cache_resolve_queue_len);
595 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
596 if (ip_hdr(skb)->version == 0) {
597 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
598 nlh->nlmsg_type = NLMSG_ERROR;
599 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
600 skb_trim(skb, nlh->nlmsg_len);
601 e = NLMSG_DATA(nlh);
602 e->error = -ETIMEDOUT;
603 memset(&e->msg, 0, sizeof(e->msg));
605 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
606 } else
607 kfree_skb(skb);
610 ipmr_cache_free(c);
614 /* Timer process for the unresolved queue. */
616 static void ipmr_expire_process(unsigned long arg)
618 struct mr_table *mrt = (struct mr_table *)arg;
619 unsigned long now;
620 unsigned long expires;
621 struct mfc_cache *c, *next;
623 if (!spin_trylock(&mfc_unres_lock)) {
624 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
625 return;
628 if (list_empty(&mrt->mfc_unres_queue))
629 goto out;
631 now = jiffies;
632 expires = 10*HZ;
634 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
635 if (time_after(c->mfc_un.unres.expires, now)) {
636 unsigned long interval = c->mfc_un.unres.expires - now;
637 if (interval < expires)
638 expires = interval;
639 continue;
642 list_del(&c->list);
643 ipmr_destroy_unres(mrt, c);
646 if (!list_empty(&mrt->mfc_unres_queue))
647 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
649 out:
650 spin_unlock(&mfc_unres_lock);
653 /* Fill oifs list. It is called under write locked mrt_lock. */
655 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
656 unsigned char *ttls)
658 int vifi;
660 cache->mfc_un.res.minvif = MAXVIFS;
661 cache->mfc_un.res.maxvif = 0;
662 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
664 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
665 if (VIF_EXISTS(mrt, vifi) &&
666 ttls[vifi] && ttls[vifi] < 255) {
667 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
668 if (cache->mfc_un.res.minvif > vifi)
669 cache->mfc_un.res.minvif = vifi;
670 if (cache->mfc_un.res.maxvif <= vifi)
671 cache->mfc_un.res.maxvif = vifi + 1;
676 static int vif_add(struct net *net, struct mr_table *mrt,
677 struct vifctl *vifc, int mrtsock)
679 int vifi = vifc->vifc_vifi;
680 struct vif_device *v = &mrt->vif_table[vifi];
681 struct net_device *dev;
682 struct in_device *in_dev;
683 int err;
685 /* Is vif busy ? */
686 if (VIF_EXISTS(mrt, vifi))
687 return -EADDRINUSE;
689 switch (vifc->vifc_flags) {
690 #ifdef CONFIG_IP_PIMSM
691 case VIFF_REGISTER:
693 * Special Purpose VIF in PIM
694 * All the packets will be sent to the daemon
696 if (mrt->mroute_reg_vif_num >= 0)
697 return -EADDRINUSE;
698 dev = ipmr_reg_vif(net, mrt);
699 if (!dev)
700 return -ENOBUFS;
701 err = dev_set_allmulti(dev, 1);
702 if (err) {
703 unregister_netdevice(dev);
704 dev_put(dev);
705 return err;
707 break;
708 #endif
709 case VIFF_TUNNEL:
710 dev = ipmr_new_tunnel(net, vifc);
711 if (!dev)
712 return -ENOBUFS;
713 err = dev_set_allmulti(dev, 1);
714 if (err) {
715 ipmr_del_tunnel(dev, vifc);
716 dev_put(dev);
717 return err;
719 break;
721 case VIFF_USE_IFINDEX:
722 case 0:
723 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
724 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
725 if (dev && dev->ip_ptr == NULL) {
726 dev_put(dev);
727 return -EADDRNOTAVAIL;
729 } else
730 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
732 if (!dev)
733 return -EADDRNOTAVAIL;
734 err = dev_set_allmulti(dev, 1);
735 if (err) {
736 dev_put(dev);
737 return err;
739 break;
740 default:
741 return -EINVAL;
744 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
745 dev_put(dev);
746 return -EADDRNOTAVAIL;
748 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
749 ip_rt_multicast_event(in_dev);
752 * Fill in the VIF structures
754 v->rate_limit = vifc->vifc_rate_limit;
755 v->local = vifc->vifc_lcl_addr.s_addr;
756 v->remote = vifc->vifc_rmt_addr.s_addr;
757 v->flags = vifc->vifc_flags;
758 if (!mrtsock)
759 v->flags |= VIFF_STATIC;
760 v->threshold = vifc->vifc_threshold;
761 v->bytes_in = 0;
762 v->bytes_out = 0;
763 v->pkt_in = 0;
764 v->pkt_out = 0;
765 v->link = dev->ifindex;
766 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
767 v->link = dev->iflink;
769 /* And finish update writing critical data */
770 write_lock_bh(&mrt_lock);
771 v->dev = dev;
772 #ifdef CONFIG_IP_PIMSM
773 if (v->flags&VIFF_REGISTER)
774 mrt->mroute_reg_vif_num = vifi;
775 #endif
776 if (vifi+1 > mrt->maxvif)
777 mrt->maxvif = vifi+1;
778 write_unlock_bh(&mrt_lock);
779 return 0;
782 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
783 __be32 origin,
784 __be32 mcastgrp)
786 int line = MFC_HASH(mcastgrp, origin);
787 struct mfc_cache *c;
789 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
790 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
791 return c;
793 return NULL;
797 * Allocate a multicast cache entry
799 static struct mfc_cache *ipmr_cache_alloc(void)
801 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
802 if (c == NULL)
803 return NULL;
804 c->mfc_un.res.minvif = MAXVIFS;
805 return c;
808 static struct mfc_cache *ipmr_cache_alloc_unres(void)
810 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
811 if (c == NULL)
812 return NULL;
813 skb_queue_head_init(&c->mfc_un.unres.unresolved);
814 c->mfc_un.unres.expires = jiffies + 10*HZ;
815 return c;
819 * A cache entry has gone into a resolved state from queued
822 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
823 struct mfc_cache *uc, struct mfc_cache *c)
825 struct sk_buff *skb;
826 struct nlmsgerr *e;
829 * Play the pending entries through our router
832 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
833 if (ip_hdr(skb)->version == 0) {
834 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
836 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
837 nlh->nlmsg_len = (skb_tail_pointer(skb) -
838 (u8 *)nlh);
839 } else {
840 nlh->nlmsg_type = NLMSG_ERROR;
841 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
842 skb_trim(skb, nlh->nlmsg_len);
843 e = NLMSG_DATA(nlh);
844 e->error = -EMSGSIZE;
845 memset(&e->msg, 0, sizeof(e->msg));
848 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
849 } else
850 ip_mr_forward(net, mrt, skb, c, 0);
855 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
856 * expects the following bizarre scheme.
858 * Called under mrt_lock.
861 static int ipmr_cache_report(struct mr_table *mrt,
862 struct sk_buff *pkt, vifi_t vifi, int assert)
864 struct sk_buff *skb;
865 const int ihl = ip_hdrlen(pkt);
866 struct igmphdr *igmp;
867 struct igmpmsg *msg;
868 int ret;
870 #ifdef CONFIG_IP_PIMSM
871 if (assert == IGMPMSG_WHOLEPKT)
872 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
873 else
874 #endif
875 skb = alloc_skb(128, GFP_ATOMIC);
877 if (!skb)
878 return -ENOBUFS;
880 #ifdef CONFIG_IP_PIMSM
881 if (assert == IGMPMSG_WHOLEPKT) {
882 /* Ugly, but we have no choice with this interface.
883 Duplicate old header, fix ihl, length etc.
884 And all this only to mangle msg->im_msgtype and
885 to set msg->im_mbz to "mbz" :-)
887 skb_push(skb, sizeof(struct iphdr));
888 skb_reset_network_header(skb);
889 skb_reset_transport_header(skb);
890 msg = (struct igmpmsg *)skb_network_header(skb);
891 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
892 msg->im_msgtype = IGMPMSG_WHOLEPKT;
893 msg->im_mbz = 0;
894 msg->im_vif = mrt->mroute_reg_vif_num;
895 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
896 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
897 sizeof(struct iphdr));
898 } else
899 #endif
903 * Copy the IP header
906 skb->network_header = skb->tail;
907 skb_put(skb, ihl);
908 skb_copy_to_linear_data(skb, pkt->data, ihl);
909 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
910 msg = (struct igmpmsg *)skb_network_header(skb);
911 msg->im_vif = vifi;
912 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
915 * Add our header
918 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
919 igmp->type =
920 msg->im_msgtype = assert;
921 igmp->code = 0;
922 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
923 skb->transport_header = skb->network_header;
926 if (mrt->mroute_sk == NULL) {
927 kfree_skb(skb);
928 return -EINVAL;
932 * Deliver to mrouted
934 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
935 if (ret < 0) {
936 if (net_ratelimit())
937 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
938 kfree_skb(skb);
941 return ret;
945 * Queue a packet for resolution. It gets locked cache entry!
948 static int
949 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
951 bool found = false;
952 int err;
953 struct mfc_cache *c;
954 const struct iphdr *iph = ip_hdr(skb);
956 spin_lock_bh(&mfc_unres_lock);
957 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
958 if (c->mfc_mcastgrp == iph->daddr &&
959 c->mfc_origin == iph->saddr) {
960 found = true;
961 break;
965 if (!found) {
967 * Create a new entry if allowable
970 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
971 (c = ipmr_cache_alloc_unres()) == NULL) {
972 spin_unlock_bh(&mfc_unres_lock);
974 kfree_skb(skb);
975 return -ENOBUFS;
979 * Fill in the new cache entry
981 c->mfc_parent = -1;
982 c->mfc_origin = iph->saddr;
983 c->mfc_mcastgrp = iph->daddr;
986 * Reflect first query at mrouted.
988 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
989 if (err < 0) {
990 /* If the report failed throw the cache entry
991 out - Brad Parker
993 spin_unlock_bh(&mfc_unres_lock);
995 ipmr_cache_free(c);
996 kfree_skb(skb);
997 return err;
1000 atomic_inc(&mrt->cache_resolve_queue_len);
1001 list_add(&c->list, &mrt->mfc_unres_queue);
1003 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1004 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1008 * See if we can append the packet
1010 if (c->mfc_un.unres.unresolved.qlen>3) {
1011 kfree_skb(skb);
1012 err = -ENOBUFS;
1013 } else {
1014 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1015 err = 0;
1018 spin_unlock_bh(&mfc_unres_lock);
1019 return err;
1023 * MFC cache manipulation by user space mroute daemon
1026 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1028 int line;
1029 struct mfc_cache *c, *next;
1031 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1033 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1034 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1035 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1036 write_lock_bh(&mrt_lock);
1037 list_del(&c->list);
1038 write_unlock_bh(&mrt_lock);
1040 ipmr_cache_free(c);
1041 return 0;
1044 return -ENOENT;
1047 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1048 struct mfcctl *mfc, int mrtsock)
1050 bool found = false;
1051 int line;
1052 struct mfc_cache *uc, *c;
1054 if (mfc->mfcc_parent >= MAXVIFS)
1055 return -ENFILE;
1057 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1059 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1060 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1061 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1062 found = true;
1063 break;
1067 if (found) {
1068 write_lock_bh(&mrt_lock);
1069 c->mfc_parent = mfc->mfcc_parent;
1070 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1071 if (!mrtsock)
1072 c->mfc_flags |= MFC_STATIC;
1073 write_unlock_bh(&mrt_lock);
1074 return 0;
1077 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1078 return -EINVAL;
1080 c = ipmr_cache_alloc();
1081 if (c == NULL)
1082 return -ENOMEM;
1084 c->mfc_origin = mfc->mfcc_origin.s_addr;
1085 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1086 c->mfc_parent = mfc->mfcc_parent;
1087 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1088 if (!mrtsock)
1089 c->mfc_flags |= MFC_STATIC;
1091 write_lock_bh(&mrt_lock);
1092 list_add(&c->list, &mrt->mfc_cache_array[line]);
1093 write_unlock_bh(&mrt_lock);
1096 * Check to see if we resolved a queued list. If so we
1097 * need to send on the frames and tidy up.
1099 found = false;
1100 spin_lock_bh(&mfc_unres_lock);
1101 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1102 if (uc->mfc_origin == c->mfc_origin &&
1103 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1104 list_del(&uc->list);
1105 atomic_dec(&mrt->cache_resolve_queue_len);
1106 found = true;
1107 break;
1110 if (list_empty(&mrt->mfc_unres_queue))
1111 del_timer(&mrt->ipmr_expire_timer);
1112 spin_unlock_bh(&mfc_unres_lock);
1114 if (found) {
1115 ipmr_cache_resolve(net, mrt, uc, c);
1116 ipmr_cache_free(uc);
1118 return 0;
1122 * Close the multicast socket, and clear the vif tables etc
1125 static void mroute_clean_tables(struct mr_table *mrt)
1127 int i;
1128 LIST_HEAD(list);
1129 struct mfc_cache *c, *next;
1132 * Shut down all active vif entries
1134 for (i = 0; i < mrt->maxvif; i++) {
1135 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1136 vif_delete(mrt, i, 0, &list);
1138 unregister_netdevice_many(&list);
1141 * Wipe the cache
1143 for (i = 0; i < MFC_LINES; i++) {
1144 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1145 if (c->mfc_flags&MFC_STATIC)
1146 continue;
1147 write_lock_bh(&mrt_lock);
1148 list_del(&c->list);
1149 write_unlock_bh(&mrt_lock);
1151 ipmr_cache_free(c);
1155 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1156 spin_lock_bh(&mfc_unres_lock);
1157 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1158 list_del(&c->list);
1159 ipmr_destroy_unres(mrt, c);
1161 spin_unlock_bh(&mfc_unres_lock);
1165 static void mrtsock_destruct(struct sock *sk)
1167 struct net *net = sock_net(sk);
1168 struct mr_table *mrt;
1170 rtnl_lock();
1171 ipmr_for_each_table(mrt, net) {
1172 if (sk == mrt->mroute_sk) {
1173 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1175 write_lock_bh(&mrt_lock);
1176 mrt->mroute_sk = NULL;
1177 write_unlock_bh(&mrt_lock);
1179 mroute_clean_tables(mrt);
1182 rtnl_unlock();
1186 * Socket options and virtual interface manipulation. The whole
1187 * virtual interface system is a complete heap, but unfortunately
1188 * that's how BSD mrouted happens to think. Maybe one day with a proper
1189 * MOSPF/PIM router set up we can clean this up.
1192 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1194 int ret;
1195 struct vifctl vif;
1196 struct mfcctl mfc;
1197 struct net *net = sock_net(sk);
1198 struct mr_table *mrt;
1200 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1201 if (mrt == NULL)
1202 return -ENOENT;
1204 if (optname != MRT_INIT) {
1205 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1206 return -EACCES;
1209 switch (optname) {
1210 case MRT_INIT:
1211 if (sk->sk_type != SOCK_RAW ||
1212 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1213 return -EOPNOTSUPP;
1214 if (optlen != sizeof(int))
1215 return -ENOPROTOOPT;
1217 rtnl_lock();
1218 if (mrt->mroute_sk) {
1219 rtnl_unlock();
1220 return -EADDRINUSE;
1223 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1224 if (ret == 0) {
1225 write_lock_bh(&mrt_lock);
1226 mrt->mroute_sk = sk;
1227 write_unlock_bh(&mrt_lock);
1229 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1231 rtnl_unlock();
1232 return ret;
1233 case MRT_DONE:
1234 if (sk != mrt->mroute_sk)
1235 return -EACCES;
1236 return ip_ra_control(sk, 0, NULL);
1237 case MRT_ADD_VIF:
1238 case MRT_DEL_VIF:
1239 if (optlen != sizeof(vif))
1240 return -EINVAL;
1241 if (copy_from_user(&vif, optval, sizeof(vif)))
1242 return -EFAULT;
1243 if (vif.vifc_vifi >= MAXVIFS)
1244 return -ENFILE;
1245 rtnl_lock();
1246 if (optname == MRT_ADD_VIF) {
1247 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1248 } else {
1249 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1251 rtnl_unlock();
1252 return ret;
1255 * Manipulate the forwarding caches. These live
1256 * in a sort of kernel/user symbiosis.
1258 case MRT_ADD_MFC:
1259 case MRT_DEL_MFC:
1260 if (optlen != sizeof(mfc))
1261 return -EINVAL;
1262 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1263 return -EFAULT;
1264 rtnl_lock();
1265 if (optname == MRT_DEL_MFC)
1266 ret = ipmr_mfc_delete(mrt, &mfc);
1267 else
1268 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1269 rtnl_unlock();
1270 return ret;
1272 * Control PIM assert.
1274 case MRT_ASSERT:
1276 int v;
1277 if (get_user(v,(int __user *)optval))
1278 return -EFAULT;
1279 mrt->mroute_do_assert = (v) ? 1 : 0;
1280 return 0;
1282 #ifdef CONFIG_IP_PIMSM
1283 case MRT_PIM:
1285 int v;
1287 if (get_user(v,(int __user *)optval))
1288 return -EFAULT;
1289 v = (v) ? 1 : 0;
1291 rtnl_lock();
1292 ret = 0;
1293 if (v != mrt->mroute_do_pim) {
1294 mrt->mroute_do_pim = v;
1295 mrt->mroute_do_assert = v;
1297 rtnl_unlock();
1298 return ret;
1300 #endif
1301 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1302 case MRT_TABLE:
1304 u32 v;
1306 if (optlen != sizeof(u32))
1307 return -EINVAL;
1308 if (get_user(v, (u32 __user *)optval))
1309 return -EFAULT;
1310 if (sk == mrt->mroute_sk)
1311 return -EBUSY;
1313 rtnl_lock();
1314 ret = 0;
1315 if (!ipmr_new_table(net, v))
1316 ret = -ENOMEM;
1317 raw_sk(sk)->ipmr_table = v;
1318 rtnl_unlock();
1319 return ret;
1321 #endif
1323 * Spurious command, or MRT_VERSION which you cannot
1324 * set.
1326 default:
1327 return -ENOPROTOOPT;
1332 * Getsock opt support for the multicast routing system.
1335 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1337 int olr;
1338 int val;
1339 struct net *net = sock_net(sk);
1340 struct mr_table *mrt;
1342 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1343 if (mrt == NULL)
1344 return -ENOENT;
1346 if (optname != MRT_VERSION &&
1347 #ifdef CONFIG_IP_PIMSM
1348 optname!=MRT_PIM &&
1349 #endif
1350 optname!=MRT_ASSERT)
1351 return -ENOPROTOOPT;
1353 if (get_user(olr, optlen))
1354 return -EFAULT;
1356 olr = min_t(unsigned int, olr, sizeof(int));
1357 if (olr < 0)
1358 return -EINVAL;
1360 if (put_user(olr, optlen))
1361 return -EFAULT;
1362 if (optname == MRT_VERSION)
1363 val = 0x0305;
1364 #ifdef CONFIG_IP_PIMSM
1365 else if (optname == MRT_PIM)
1366 val = mrt->mroute_do_pim;
1367 #endif
1368 else
1369 val = mrt->mroute_do_assert;
1370 if (copy_to_user(optval, &val, olr))
1371 return -EFAULT;
1372 return 0;
1376 * The IP multicast ioctl support routines.
1379 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1381 struct sioc_sg_req sr;
1382 struct sioc_vif_req vr;
1383 struct vif_device *vif;
1384 struct mfc_cache *c;
1385 struct net *net = sock_net(sk);
1386 struct mr_table *mrt;
1388 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1389 if (mrt == NULL)
1390 return -ENOENT;
1392 switch (cmd) {
1393 case SIOCGETVIFCNT:
1394 if (copy_from_user(&vr, arg, sizeof(vr)))
1395 return -EFAULT;
1396 if (vr.vifi >= mrt->maxvif)
1397 return -EINVAL;
1398 read_lock(&mrt_lock);
1399 vif = &mrt->vif_table[vr.vifi];
1400 if (VIF_EXISTS(mrt, vr.vifi)) {
1401 vr.icount = vif->pkt_in;
1402 vr.ocount = vif->pkt_out;
1403 vr.ibytes = vif->bytes_in;
1404 vr.obytes = vif->bytes_out;
1405 read_unlock(&mrt_lock);
1407 if (copy_to_user(arg, &vr, sizeof(vr)))
1408 return -EFAULT;
1409 return 0;
1411 read_unlock(&mrt_lock);
1412 return -EADDRNOTAVAIL;
1413 case SIOCGETSGCNT:
1414 if (copy_from_user(&sr, arg, sizeof(sr)))
1415 return -EFAULT;
1417 read_lock(&mrt_lock);
1418 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1419 if (c) {
1420 sr.pktcnt = c->mfc_un.res.pkt;
1421 sr.bytecnt = c->mfc_un.res.bytes;
1422 sr.wrong_if = c->mfc_un.res.wrong_if;
1423 read_unlock(&mrt_lock);
1425 if (copy_to_user(arg, &sr, sizeof(sr)))
1426 return -EFAULT;
1427 return 0;
1429 read_unlock(&mrt_lock);
1430 return -EADDRNOTAVAIL;
1431 default:
1432 return -ENOIOCTLCMD;
1437 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1439 struct net_device *dev = ptr;
1440 struct net *net = dev_net(dev);
1441 struct mr_table *mrt;
1442 struct vif_device *v;
1443 int ct;
1444 LIST_HEAD(list);
1446 if (event != NETDEV_UNREGISTER)
1447 return NOTIFY_DONE;
1449 ipmr_for_each_table(mrt, net) {
1450 v = &mrt->vif_table[0];
1451 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1452 if (v->dev == dev)
1453 vif_delete(mrt, ct, 1, &list);
1456 unregister_netdevice_many(&list);
1457 return NOTIFY_DONE;
1461 static struct notifier_block ip_mr_notifier = {
1462 .notifier_call = ipmr_device_event,
1466 * Encapsulate a packet by attaching a valid IPIP header to it.
1467 * This avoids tunnel drivers and other mess and gives us the speed so
1468 * important for multicast video.
1471 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1473 struct iphdr *iph;
1474 struct iphdr *old_iph = ip_hdr(skb);
1476 skb_push(skb, sizeof(struct iphdr));
1477 skb->transport_header = skb->network_header;
1478 skb_reset_network_header(skb);
1479 iph = ip_hdr(skb);
1481 iph->version = 4;
1482 iph->tos = old_iph->tos;
1483 iph->ttl = old_iph->ttl;
1484 iph->frag_off = 0;
1485 iph->daddr = daddr;
1486 iph->saddr = saddr;
1487 iph->protocol = IPPROTO_IPIP;
1488 iph->ihl = 5;
1489 iph->tot_len = htons(skb->len);
1490 ip_select_ident(iph, skb_dst(skb), NULL);
1491 ip_send_check(iph);
1493 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1494 nf_reset(skb);
1497 static inline int ipmr_forward_finish(struct sk_buff *skb)
1499 struct ip_options * opt = &(IPCB(skb)->opt);
1501 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1503 if (unlikely(opt->optlen))
1504 ip_forward_options(skb);
1506 return dst_output(skb);
1510 * Processing handlers for ipmr_forward
1513 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1514 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1516 const struct iphdr *iph = ip_hdr(skb);
1517 struct vif_device *vif = &mrt->vif_table[vifi];
1518 struct net_device *dev;
1519 struct rtable *rt;
1520 int encap = 0;
1522 if (vif->dev == NULL)
1523 goto out_free;
1525 #ifdef CONFIG_IP_PIMSM
1526 if (vif->flags & VIFF_REGISTER) {
1527 vif->pkt_out++;
1528 vif->bytes_out += skb->len;
1529 vif->dev->stats.tx_bytes += skb->len;
1530 vif->dev->stats.tx_packets++;
1531 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1532 goto out_free;
1534 #endif
1536 if (vif->flags&VIFF_TUNNEL) {
1537 struct flowi fl = { .oif = vif->link,
1538 .nl_u = { .ip4_u =
1539 { .daddr = vif->remote,
1540 .saddr = vif->local,
1541 .tos = RT_TOS(iph->tos) } },
1542 .proto = IPPROTO_IPIP };
1543 if (ip_route_output_key(net, &rt, &fl))
1544 goto out_free;
1545 encap = sizeof(struct iphdr);
1546 } else {
1547 struct flowi fl = { .oif = vif->link,
1548 .nl_u = { .ip4_u =
1549 { .daddr = iph->daddr,
1550 .tos = RT_TOS(iph->tos) } },
1551 .proto = IPPROTO_IPIP };
1552 if (ip_route_output_key(net, &rt, &fl))
1553 goto out_free;
1556 dev = rt->u.dst.dev;
1558 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1559 /* Do not fragment multicasts. Alas, IPv4 does not
1560 allow to send ICMP, so that packets will disappear
1561 to blackhole.
1564 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1565 ip_rt_put(rt);
1566 goto out_free;
1569 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1571 if (skb_cow(skb, encap)) {
1572 ip_rt_put(rt);
1573 goto out_free;
1576 vif->pkt_out++;
1577 vif->bytes_out += skb->len;
1579 skb_dst_drop(skb);
1580 skb_dst_set(skb, &rt->u.dst);
1581 ip_decrease_ttl(ip_hdr(skb));
1583 /* FIXME: forward and output firewalls used to be called here.
1584 * What do we do with netfilter? -- RR */
1585 if (vif->flags & VIFF_TUNNEL) {
1586 ip_encap(skb, vif->local, vif->remote);
1587 /* FIXME: extra output firewall step used to be here. --RR */
1588 vif->dev->stats.tx_packets++;
1589 vif->dev->stats.tx_bytes += skb->len;
1592 IPCB(skb)->flags |= IPSKB_FORWARDED;
1595 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1596 * not only before forwarding, but after forwarding on all output
1597 * interfaces. It is clear, if mrouter runs a multicasting
1598 * program, it should receive packets not depending to what interface
1599 * program is joined.
1600 * If we will not make it, the program will have to join on all
1601 * interfaces. On the other hand, multihoming host (or router, but
1602 * not mrouter) cannot join to more than one interface - it will
1603 * result in receiving multiple packets.
1605 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1606 ipmr_forward_finish);
1607 return;
1609 out_free:
1610 kfree_skb(skb);
1613 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1615 int ct;
1617 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1618 if (mrt->vif_table[ct].dev == dev)
1619 break;
1621 return ct;
1624 /* "local" means that we should preserve one skb (for local delivery) */
1626 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1627 struct sk_buff *skb, struct mfc_cache *cache,
1628 int local)
1630 int psend = -1;
1631 int vif, ct;
1633 vif = cache->mfc_parent;
1634 cache->mfc_un.res.pkt++;
1635 cache->mfc_un.res.bytes += skb->len;
1638 * Wrong interface: drop packet and (maybe) send PIM assert.
1640 if (mrt->vif_table[vif].dev != skb->dev) {
1641 int true_vifi;
1643 if (skb_rtable(skb)->fl.iif == 0) {
1644 /* It is our own packet, looped back.
1645 Very complicated situation...
1647 The best workaround until routing daemons will be
1648 fixed is not to redistribute packet, if it was
1649 send through wrong interface. It means, that
1650 multicast applications WILL NOT work for
1651 (S,G), which have default multicast route pointing
1652 to wrong oif. In any case, it is not a good
1653 idea to use multicasting applications on router.
1655 goto dont_forward;
1658 cache->mfc_un.res.wrong_if++;
1659 true_vifi = ipmr_find_vif(mrt, skb->dev);
1661 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1662 /* pimsm uses asserts, when switching from RPT to SPT,
1663 so that we cannot check that packet arrived on an oif.
1664 It is bad, but otherwise we would need to move pretty
1665 large chunk of pimd to kernel. Ough... --ANK
1667 (mrt->mroute_do_pim ||
1668 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1669 time_after(jiffies,
1670 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1671 cache->mfc_un.res.last_assert = jiffies;
1672 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1674 goto dont_forward;
1677 mrt->vif_table[vif].pkt_in++;
1678 mrt->vif_table[vif].bytes_in += skb->len;
1681 * Forward the frame
1683 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1684 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1685 if (psend != -1) {
1686 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1687 if (skb2)
1688 ipmr_queue_xmit(net, mrt, skb2, cache,
1689 psend);
1691 psend = ct;
1694 if (psend != -1) {
1695 if (local) {
1696 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1697 if (skb2)
1698 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1699 } else {
1700 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1701 return 0;
1705 dont_forward:
1706 if (!local)
1707 kfree_skb(skb);
1708 return 0;
1713 * Multicast packets for forwarding arrive here
1716 int ip_mr_input(struct sk_buff *skb)
1718 struct mfc_cache *cache;
1719 struct net *net = dev_net(skb->dev);
1720 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1721 struct mr_table *mrt;
1722 int err;
1724 /* Packet is looped back after forward, it should not be
1725 forwarded second time, but still can be delivered locally.
1727 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1728 goto dont_forward;
1730 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1731 if (err < 0)
1732 return err;
1734 if (!local) {
1735 if (IPCB(skb)->opt.router_alert) {
1736 if (ip_call_ra_chain(skb))
1737 return 0;
1738 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1739 /* IGMPv1 (and broken IGMPv2 implementations sort of
1740 Cisco IOS <= 11.2(8)) do not put router alert
1741 option to IGMP packets destined to routable
1742 groups. It is very bad, because it means
1743 that we can forward NO IGMP messages.
1745 read_lock(&mrt_lock);
1746 if (mrt->mroute_sk) {
1747 nf_reset(skb);
1748 raw_rcv(mrt->mroute_sk, skb);
1749 read_unlock(&mrt_lock);
1750 return 0;
1752 read_unlock(&mrt_lock);
1756 read_lock(&mrt_lock);
1757 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1760 * No usable cache entry
1762 if (cache == NULL) {
1763 int vif;
1765 if (local) {
1766 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1767 ip_local_deliver(skb);
1768 if (skb2 == NULL) {
1769 read_unlock(&mrt_lock);
1770 return -ENOBUFS;
1772 skb = skb2;
1775 vif = ipmr_find_vif(mrt, skb->dev);
1776 if (vif >= 0) {
1777 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1778 read_unlock(&mrt_lock);
1780 return err2;
1782 read_unlock(&mrt_lock);
1783 kfree_skb(skb);
1784 return -ENODEV;
1787 ip_mr_forward(net, mrt, skb, cache, local);
1789 read_unlock(&mrt_lock);
1791 if (local)
1792 return ip_local_deliver(skb);
1794 return 0;
1796 dont_forward:
1797 if (local)
1798 return ip_local_deliver(skb);
1799 kfree_skb(skb);
1800 return 0;
1803 #ifdef CONFIG_IP_PIMSM
1804 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1805 unsigned int pimlen)
1807 struct net_device *reg_dev = NULL;
1808 struct iphdr *encap;
1810 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1812 Check that:
1813 a. packet is really destinted to a multicast group
1814 b. packet is not a NULL-REGISTER
1815 c. packet is not truncated
1817 if (!ipv4_is_multicast(encap->daddr) ||
1818 encap->tot_len == 0 ||
1819 ntohs(encap->tot_len) + pimlen > skb->len)
1820 return 1;
1822 read_lock(&mrt_lock);
1823 if (mrt->mroute_reg_vif_num >= 0)
1824 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1825 if (reg_dev)
1826 dev_hold(reg_dev);
1827 read_unlock(&mrt_lock);
1829 if (reg_dev == NULL)
1830 return 1;
1832 skb->mac_header = skb->network_header;
1833 skb_pull(skb, (u8*)encap - skb->data);
1834 skb_reset_network_header(skb);
1835 skb->protocol = htons(ETH_P_IP);
1836 skb->ip_summed = 0;
1837 skb->pkt_type = PACKET_HOST;
1839 skb_tunnel_rx(skb, reg_dev);
1841 netif_rx(skb);
1842 dev_put(reg_dev);
1844 return 0;
1846 #endif
1848 #ifdef CONFIG_IP_PIMSM_V1
1850 * Handle IGMP messages of PIMv1
1853 int pim_rcv_v1(struct sk_buff * skb)
1855 struct igmphdr *pim;
1856 struct net *net = dev_net(skb->dev);
1857 struct mr_table *mrt;
1859 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1860 goto drop;
1862 pim = igmp_hdr(skb);
1864 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1865 goto drop;
1867 if (!mrt->mroute_do_pim ||
1868 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1869 goto drop;
1871 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1872 drop:
1873 kfree_skb(skb);
1875 return 0;
1877 #endif
1879 #ifdef CONFIG_IP_PIMSM_V2
1880 static int pim_rcv(struct sk_buff * skb)
1882 struct pimreghdr *pim;
1883 struct net *net = dev_net(skb->dev);
1884 struct mr_table *mrt;
1886 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1887 goto drop;
1889 pim = (struct pimreghdr *)skb_transport_header(skb);
1890 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1891 (pim->flags&PIM_NULL_REGISTER) ||
1892 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1893 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1894 goto drop;
1896 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1897 goto drop;
1899 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1900 drop:
1901 kfree_skb(skb);
1903 return 0;
1905 #endif
1907 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1908 struct mfc_cache *c, struct rtmsg *rtm)
1910 int ct;
1911 struct rtnexthop *nhp;
1912 u8 *b = skb_tail_pointer(skb);
1913 struct rtattr *mp_head;
1915 /* If cache is unresolved, don't try to parse IIF and OIF */
1916 if (c->mfc_parent >= MAXVIFS)
1917 return -ENOENT;
1919 if (VIF_EXISTS(mrt, c->mfc_parent))
1920 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1922 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1924 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1925 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1926 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1927 goto rtattr_failure;
1928 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1929 nhp->rtnh_flags = 0;
1930 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1931 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1932 nhp->rtnh_len = sizeof(*nhp);
1935 mp_head->rta_type = RTA_MULTIPATH;
1936 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1937 rtm->rtm_type = RTN_MULTICAST;
1938 return 1;
1940 rtattr_failure:
1941 nlmsg_trim(skb, b);
1942 return -EMSGSIZE;
1945 int ipmr_get_route(struct net *net,
1946 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1948 int err;
1949 struct mr_table *mrt;
1950 struct mfc_cache *cache;
1951 struct rtable *rt = skb_rtable(skb);
1953 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1954 if (mrt == NULL)
1955 return -ENOENT;
1957 read_lock(&mrt_lock);
1958 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1960 if (cache == NULL) {
1961 struct sk_buff *skb2;
1962 struct iphdr *iph;
1963 struct net_device *dev;
1964 int vif;
1966 if (nowait) {
1967 read_unlock(&mrt_lock);
1968 return -EAGAIN;
1971 dev = skb->dev;
1972 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1973 read_unlock(&mrt_lock);
1974 return -ENODEV;
1976 skb2 = skb_clone(skb, GFP_ATOMIC);
1977 if (!skb2) {
1978 read_unlock(&mrt_lock);
1979 return -ENOMEM;
1982 skb_push(skb2, sizeof(struct iphdr));
1983 skb_reset_network_header(skb2);
1984 iph = ip_hdr(skb2);
1985 iph->ihl = sizeof(struct iphdr) >> 2;
1986 iph->saddr = rt->rt_src;
1987 iph->daddr = rt->rt_dst;
1988 iph->version = 0;
1989 err = ipmr_cache_unresolved(mrt, vif, skb2);
1990 read_unlock(&mrt_lock);
1991 return err;
1994 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1995 cache->mfc_flags |= MFC_NOTIFY;
1996 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1997 read_unlock(&mrt_lock);
1998 return err;
2001 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2002 u32 pid, u32 seq, struct mfc_cache *c)
2004 struct nlmsghdr *nlh;
2005 struct rtmsg *rtm;
2007 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2008 if (nlh == NULL)
2009 return -EMSGSIZE;
2011 rtm = nlmsg_data(nlh);
2012 rtm->rtm_family = RTNL_FAMILY_IPMR;
2013 rtm->rtm_dst_len = 32;
2014 rtm->rtm_src_len = 32;
2015 rtm->rtm_tos = 0;
2016 rtm->rtm_table = mrt->id;
2017 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2018 rtm->rtm_type = RTN_MULTICAST;
2019 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2020 rtm->rtm_protocol = RTPROT_UNSPEC;
2021 rtm->rtm_flags = 0;
2023 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2024 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2026 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2027 goto nla_put_failure;
2029 return nlmsg_end(skb, nlh);
2031 nla_put_failure:
2032 nlmsg_cancel(skb, nlh);
2033 return -EMSGSIZE;
2036 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2038 struct net *net = sock_net(skb->sk);
2039 struct mr_table *mrt;
2040 struct mfc_cache *mfc;
2041 unsigned int t = 0, s_t;
2042 unsigned int h = 0, s_h;
2043 unsigned int e = 0, s_e;
2045 s_t = cb->args[0];
2046 s_h = cb->args[1];
2047 s_e = cb->args[2];
2049 read_lock(&mrt_lock);
2050 ipmr_for_each_table(mrt, net) {
2051 if (t < s_t)
2052 goto next_table;
2053 if (t > s_t)
2054 s_h = 0;
2055 for (h = s_h; h < MFC_LINES; h++) {
2056 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2057 if (e < s_e)
2058 goto next_entry;
2059 if (ipmr_fill_mroute(mrt, skb,
2060 NETLINK_CB(cb->skb).pid,
2061 cb->nlh->nlmsg_seq,
2062 mfc) < 0)
2063 goto done;
2064 next_entry:
2065 e++;
2067 e = s_e = 0;
2069 s_h = 0;
2070 next_table:
2071 t++;
2073 done:
2074 read_unlock(&mrt_lock);
2076 cb->args[2] = e;
2077 cb->args[1] = h;
2078 cb->args[0] = t;
2080 return skb->len;
2083 #ifdef CONFIG_PROC_FS
2085 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2087 struct ipmr_vif_iter {
2088 struct seq_net_private p;
2089 struct mr_table *mrt;
2090 int ct;
2093 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2094 struct ipmr_vif_iter *iter,
2095 loff_t pos)
2097 struct mr_table *mrt = iter->mrt;
2099 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2100 if (!VIF_EXISTS(mrt, iter->ct))
2101 continue;
2102 if (pos-- == 0)
2103 return &mrt->vif_table[iter->ct];
2105 return NULL;
2108 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2109 __acquires(mrt_lock)
2111 struct ipmr_vif_iter *iter = seq->private;
2112 struct net *net = seq_file_net(seq);
2113 struct mr_table *mrt;
2115 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2116 if (mrt == NULL)
2117 return ERR_PTR(-ENOENT);
2119 iter->mrt = mrt;
2121 read_lock(&mrt_lock);
2122 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2123 : SEQ_START_TOKEN;
2126 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2128 struct ipmr_vif_iter *iter = seq->private;
2129 struct net *net = seq_file_net(seq);
2130 struct mr_table *mrt = iter->mrt;
2132 ++*pos;
2133 if (v == SEQ_START_TOKEN)
2134 return ipmr_vif_seq_idx(net, iter, 0);
2136 while (++iter->ct < mrt->maxvif) {
2137 if (!VIF_EXISTS(mrt, iter->ct))
2138 continue;
2139 return &mrt->vif_table[iter->ct];
2141 return NULL;
2144 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2145 __releases(mrt_lock)
2147 read_unlock(&mrt_lock);
2150 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2152 struct ipmr_vif_iter *iter = seq->private;
2153 struct mr_table *mrt = iter->mrt;
2155 if (v == SEQ_START_TOKEN) {
2156 seq_puts(seq,
2157 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2158 } else {
2159 const struct vif_device *vif = v;
2160 const char *name = vif->dev ? vif->dev->name : "none";
2162 seq_printf(seq,
2163 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2164 vif - mrt->vif_table,
2165 name, vif->bytes_in, vif->pkt_in,
2166 vif->bytes_out, vif->pkt_out,
2167 vif->flags, vif->local, vif->remote);
2169 return 0;
2172 static const struct seq_operations ipmr_vif_seq_ops = {
2173 .start = ipmr_vif_seq_start,
2174 .next = ipmr_vif_seq_next,
2175 .stop = ipmr_vif_seq_stop,
2176 .show = ipmr_vif_seq_show,
2179 static int ipmr_vif_open(struct inode *inode, struct file *file)
2181 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2182 sizeof(struct ipmr_vif_iter));
2185 static const struct file_operations ipmr_vif_fops = {
2186 .owner = THIS_MODULE,
2187 .open = ipmr_vif_open,
2188 .read = seq_read,
2189 .llseek = seq_lseek,
2190 .release = seq_release_net,
2193 struct ipmr_mfc_iter {
2194 struct seq_net_private p;
2195 struct mr_table *mrt;
2196 struct list_head *cache;
2197 int ct;
2201 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2202 struct ipmr_mfc_iter *it, loff_t pos)
2204 struct mr_table *mrt = it->mrt;
2205 struct mfc_cache *mfc;
2207 read_lock(&mrt_lock);
2208 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2209 it->cache = &mrt->mfc_cache_array[it->ct];
2210 list_for_each_entry(mfc, it->cache, list)
2211 if (pos-- == 0)
2212 return mfc;
2214 read_unlock(&mrt_lock);
2216 spin_lock_bh(&mfc_unres_lock);
2217 it->cache = &mrt->mfc_unres_queue;
2218 list_for_each_entry(mfc, it->cache, list)
2219 if (pos-- == 0)
2220 return mfc;
2221 spin_unlock_bh(&mfc_unres_lock);
2223 it->cache = NULL;
2224 return NULL;
2228 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2230 struct ipmr_mfc_iter *it = seq->private;
2231 struct net *net = seq_file_net(seq);
2232 struct mr_table *mrt;
2234 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2235 if (mrt == NULL)
2236 return ERR_PTR(-ENOENT);
2238 it->mrt = mrt;
2239 it->cache = NULL;
2240 it->ct = 0;
2241 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2242 : SEQ_START_TOKEN;
2245 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2247 struct mfc_cache *mfc = v;
2248 struct ipmr_mfc_iter *it = seq->private;
2249 struct net *net = seq_file_net(seq);
2250 struct mr_table *mrt = it->mrt;
2252 ++*pos;
2254 if (v == SEQ_START_TOKEN)
2255 return ipmr_mfc_seq_idx(net, seq->private, 0);
2257 if (mfc->list.next != it->cache)
2258 return list_entry(mfc->list.next, struct mfc_cache, list);
2260 if (it->cache == &mrt->mfc_unres_queue)
2261 goto end_of_list;
2263 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2265 while (++it->ct < MFC_LINES) {
2266 it->cache = &mrt->mfc_cache_array[it->ct];
2267 if (list_empty(it->cache))
2268 continue;
2269 return list_first_entry(it->cache, struct mfc_cache, list);
2272 /* exhausted cache_array, show unresolved */
2273 read_unlock(&mrt_lock);
2274 it->cache = &mrt->mfc_unres_queue;
2275 it->ct = 0;
2277 spin_lock_bh(&mfc_unres_lock);
2278 if (!list_empty(it->cache))
2279 return list_first_entry(it->cache, struct mfc_cache, list);
2281 end_of_list:
2282 spin_unlock_bh(&mfc_unres_lock);
2283 it->cache = NULL;
2285 return NULL;
2288 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2290 struct ipmr_mfc_iter *it = seq->private;
2291 struct mr_table *mrt = it->mrt;
2293 if (it->cache == &mrt->mfc_unres_queue)
2294 spin_unlock_bh(&mfc_unres_lock);
2295 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2296 read_unlock(&mrt_lock);
2299 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2301 int n;
2303 if (v == SEQ_START_TOKEN) {
2304 seq_puts(seq,
2305 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2306 } else {
2307 const struct mfc_cache *mfc = v;
2308 const struct ipmr_mfc_iter *it = seq->private;
2309 const struct mr_table *mrt = it->mrt;
2311 seq_printf(seq, "%08X %08X %-3hd",
2312 (__force u32) mfc->mfc_mcastgrp,
2313 (__force u32) mfc->mfc_origin,
2314 mfc->mfc_parent);
2316 if (it->cache != &mrt->mfc_unres_queue) {
2317 seq_printf(seq, " %8lu %8lu %8lu",
2318 mfc->mfc_un.res.pkt,
2319 mfc->mfc_un.res.bytes,
2320 mfc->mfc_un.res.wrong_if);
2321 for (n = mfc->mfc_un.res.minvif;
2322 n < mfc->mfc_un.res.maxvif; n++ ) {
2323 if (VIF_EXISTS(mrt, n) &&
2324 mfc->mfc_un.res.ttls[n] < 255)
2325 seq_printf(seq,
2326 " %2d:%-3d",
2327 n, mfc->mfc_un.res.ttls[n]);
2329 } else {
2330 /* unresolved mfc_caches don't contain
2331 * pkt, bytes and wrong_if values
2333 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2335 seq_putc(seq, '\n');
2337 return 0;
2340 static const struct seq_operations ipmr_mfc_seq_ops = {
2341 .start = ipmr_mfc_seq_start,
2342 .next = ipmr_mfc_seq_next,
2343 .stop = ipmr_mfc_seq_stop,
2344 .show = ipmr_mfc_seq_show,
2347 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2349 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2350 sizeof(struct ipmr_mfc_iter));
2353 static const struct file_operations ipmr_mfc_fops = {
2354 .owner = THIS_MODULE,
2355 .open = ipmr_mfc_open,
2356 .read = seq_read,
2357 .llseek = seq_lseek,
2358 .release = seq_release_net,
2360 #endif
2362 #ifdef CONFIG_IP_PIMSM_V2
2363 static const struct net_protocol pim_protocol = {
2364 .handler = pim_rcv,
2365 .netns_ok = 1,
2367 #endif
2371 * Setup for IP multicast routing
2373 static int __net_init ipmr_net_init(struct net *net)
2375 int err;
2377 err = ipmr_rules_init(net);
2378 if (err < 0)
2379 goto fail;
2381 #ifdef CONFIG_PROC_FS
2382 err = -ENOMEM;
2383 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2384 goto proc_vif_fail;
2385 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2386 goto proc_cache_fail;
2387 #endif
2388 return 0;
2390 #ifdef CONFIG_PROC_FS
2391 proc_cache_fail:
2392 proc_net_remove(net, "ip_mr_vif");
2393 proc_vif_fail:
2394 ipmr_rules_exit(net);
2395 #endif
2396 fail:
2397 return err;
2400 static void __net_exit ipmr_net_exit(struct net *net)
2402 #ifdef CONFIG_PROC_FS
2403 proc_net_remove(net, "ip_mr_cache");
2404 proc_net_remove(net, "ip_mr_vif");
2405 #endif
2406 ipmr_rules_exit(net);
2409 static struct pernet_operations ipmr_net_ops = {
2410 .init = ipmr_net_init,
2411 .exit = ipmr_net_exit,
2414 int __init ip_mr_init(void)
2416 int err;
2418 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2419 sizeof(struct mfc_cache),
2420 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2421 NULL);
2422 if (!mrt_cachep)
2423 return -ENOMEM;
2425 err = register_pernet_subsys(&ipmr_net_ops);
2426 if (err)
2427 goto reg_pernet_fail;
2429 err = register_netdevice_notifier(&ip_mr_notifier);
2430 if (err)
2431 goto reg_notif_fail;
2432 #ifdef CONFIG_IP_PIMSM_V2
2433 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2434 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2435 err = -EAGAIN;
2436 goto add_proto_fail;
2438 #endif
2439 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2440 return 0;
2442 #ifdef CONFIG_IP_PIMSM_V2
2443 add_proto_fail:
2444 unregister_netdevice_notifier(&ip_mr_notifier);
2445 #endif
2446 reg_notif_fail:
2447 unregister_pernet_subsys(&ipmr_net_ops);
2448 reg_pernet_fail:
2449 kmem_cache_destroy(mrt_cachep);
2450 return err;