2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock.
74 static DEFINE_RWLOCK(mrt_lock
);
77 * Multicast router control variables
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
82 static struct mfc_cache
*mfc_unres_queue
; /* Queue of unresolved entries */
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock
);
87 /* We return to original Alan's scheme. Hash table of resolved
88 entries is changed only in process context and protected
89 with weak lock mrt_lock. Queue of unresolved entries is protected
90 with strong spinlock mfc_unres_lock.
92 In this case data path is free of exclusive locks at all.
95 static struct kmem_cache
*mrt_cachep __read_mostly
;
97 static int ip_mr_forward(struct sk_buff
*skb
, struct mfc_cache
*cache
, int local
);
98 static int ipmr_cache_report(struct net
*net
,
99 struct sk_buff
*pkt
, vifi_t vifi
, int assert);
100 static int ipmr_fill_mroute(struct sk_buff
*skb
, struct mfc_cache
*c
, struct rtmsg
*rtm
);
102 static struct timer_list ipmr_expire_timer
;
104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
106 static void ipmr_del_tunnel(struct net_device
*dev
, struct vifctl
*v
)
108 struct net
*net
= dev_net(dev
);
112 dev
= __dev_get_by_name(net
, "tunl0");
114 const struct net_device_ops
*ops
= dev
->netdev_ops
;
116 struct ip_tunnel_parm p
;
118 memset(&p
, 0, sizeof(p
));
119 p
.iph
.daddr
= v
->vifc_rmt_addr
.s_addr
;
120 p
.iph
.saddr
= v
->vifc_lcl_addr
.s_addr
;
123 p
.iph
.protocol
= IPPROTO_IPIP
;
124 sprintf(p
.name
, "dvmrp%d", v
->vifc_vifi
);
125 ifr
.ifr_ifru
.ifru_data
= (__force
void __user
*)&p
;
127 if (ops
->ndo_do_ioctl
) {
128 mm_segment_t oldfs
= get_fs();
131 ops
->ndo_do_ioctl(dev
, &ifr
, SIOCDELTUNNEL
);
138 struct net_device
*ipmr_new_tunnel(struct net
*net
, struct vifctl
*v
)
140 struct net_device
*dev
;
142 dev
= __dev_get_by_name(net
, "tunl0");
145 const struct net_device_ops
*ops
= dev
->netdev_ops
;
148 struct ip_tunnel_parm p
;
149 struct in_device
*in_dev
;
151 memset(&p
, 0, sizeof(p
));
152 p
.iph
.daddr
= v
->vifc_rmt_addr
.s_addr
;
153 p
.iph
.saddr
= v
->vifc_lcl_addr
.s_addr
;
156 p
.iph
.protocol
= IPPROTO_IPIP
;
157 sprintf(p
.name
, "dvmrp%d", v
->vifc_vifi
);
158 ifr
.ifr_ifru
.ifru_data
= (__force
void __user
*)&p
;
160 if (ops
->ndo_do_ioctl
) {
161 mm_segment_t oldfs
= get_fs();
164 err
= ops
->ndo_do_ioctl(dev
, &ifr
, SIOCADDTUNNEL
);
172 (dev
= __dev_get_by_name(net
, p
.name
)) != NULL
) {
173 dev
->flags
|= IFF_MULTICAST
;
175 in_dev
= __in_dev_get_rtnl(dev
);
179 ipv4_devconf_setall(in_dev
);
180 IPV4_DEVCONF(in_dev
->cnf
, RP_FILTER
) = 0;
190 /* allow the register to be completed before unregistering. */
194 unregister_netdevice(dev
);
198 #ifdef CONFIG_IP_PIMSM
200 static netdev_tx_t
reg_vif_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
202 struct net
*net
= dev_net(dev
);
204 read_lock(&mrt_lock
);
205 dev
->stats
.tx_bytes
+= skb
->len
;
206 dev
->stats
.tx_packets
++;
207 ipmr_cache_report(net
, skb
, net
->ipv4
.mroute_reg_vif_num
,
209 read_unlock(&mrt_lock
);
214 static const struct net_device_ops reg_vif_netdev_ops
= {
215 .ndo_start_xmit
= reg_vif_xmit
,
218 static void reg_vif_setup(struct net_device
*dev
)
220 dev
->type
= ARPHRD_PIMREG
;
221 dev
->mtu
= ETH_DATA_LEN
- sizeof(struct iphdr
) - 8;
222 dev
->flags
= IFF_NOARP
;
223 dev
->netdev_ops
= ®_vif_netdev_ops
,
224 dev
->destructor
= free_netdev
;
225 dev
->features
|= NETIF_F_NETNS_LOCAL
;
228 static struct net_device
*ipmr_reg_vif(struct net
*net
)
230 struct net_device
*dev
;
231 struct in_device
*in_dev
;
233 dev
= alloc_netdev(0, "pimreg", reg_vif_setup
);
238 dev_net_set(dev
, net
);
240 if (register_netdevice(dev
)) {
247 if ((in_dev
= __in_dev_get_rcu(dev
)) == NULL
) {
252 ipv4_devconf_setall(in_dev
);
253 IPV4_DEVCONF(in_dev
->cnf
, RP_FILTER
) = 0;
264 /* allow the register to be completed before unregistering. */
268 unregister_netdevice(dev
);
275 * @notify: Set to 1, if the caller is a notifier_call
278 static int vif_delete(struct net
*net
, int vifi
, int notify
)
280 struct vif_device
*v
;
281 struct net_device
*dev
;
282 struct in_device
*in_dev
;
284 if (vifi
< 0 || vifi
>= net
->ipv4
.maxvif
)
285 return -EADDRNOTAVAIL
;
287 v
= &net
->ipv4
.vif_table
[vifi
];
289 write_lock_bh(&mrt_lock
);
294 write_unlock_bh(&mrt_lock
);
295 return -EADDRNOTAVAIL
;
298 #ifdef CONFIG_IP_PIMSM
299 if (vifi
== net
->ipv4
.mroute_reg_vif_num
)
300 net
->ipv4
.mroute_reg_vif_num
= -1;
303 if (vifi
+1 == net
->ipv4
.maxvif
) {
305 for (tmp
=vifi
-1; tmp
>=0; tmp
--) {
306 if (VIF_EXISTS(net
, tmp
))
309 net
->ipv4
.maxvif
= tmp
+1;
312 write_unlock_bh(&mrt_lock
);
314 dev_set_allmulti(dev
, -1);
316 if ((in_dev
= __in_dev_get_rtnl(dev
)) != NULL
) {
317 IPV4_DEVCONF(in_dev
->cnf
, MC_FORWARDING
)--;
318 ip_rt_multicast_event(in_dev
);
321 if (v
->flags
&(VIFF_TUNNEL
|VIFF_REGISTER
) && !notify
)
322 unregister_netdevice(dev
);
328 static inline void ipmr_cache_free(struct mfc_cache
*c
)
330 release_net(mfc_net(c
));
331 kmem_cache_free(mrt_cachep
, c
);
334 /* Destroy an unresolved cache entry, killing queued skbs
335 and reporting error to netlink readers.
338 static void ipmr_destroy_unres(struct mfc_cache
*c
)
342 struct net
*net
= mfc_net(c
);
344 atomic_dec(&net
->ipv4
.cache_resolve_queue_len
);
346 while ((skb
= skb_dequeue(&c
->mfc_un
.unres
.unresolved
))) {
347 if (ip_hdr(skb
)->version
== 0) {
348 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
349 nlh
->nlmsg_type
= NLMSG_ERROR
;
350 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
351 skb_trim(skb
, nlh
->nlmsg_len
);
353 e
->error
= -ETIMEDOUT
;
354 memset(&e
->msg
, 0, sizeof(e
->msg
));
356 rtnl_unicast(skb
, net
, NETLINK_CB(skb
).pid
);
365 /* Single timer process for all the unresolved queue. */
367 static void ipmr_expire_process(unsigned long dummy
)
370 unsigned long expires
;
371 struct mfc_cache
*c
, **cp
;
373 if (!spin_trylock(&mfc_unres_lock
)) {
374 mod_timer(&ipmr_expire_timer
, jiffies
+HZ
/10);
378 if (mfc_unres_queue
== NULL
)
383 cp
= &mfc_unres_queue
;
385 while ((c
=*cp
) != NULL
) {
386 if (time_after(c
->mfc_un
.unres
.expires
, now
)) {
387 unsigned long interval
= c
->mfc_un
.unres
.expires
- now
;
388 if (interval
< expires
)
396 ipmr_destroy_unres(c
);
399 if (mfc_unres_queue
!= NULL
)
400 mod_timer(&ipmr_expire_timer
, jiffies
+ expires
);
403 spin_unlock(&mfc_unres_lock
);
406 /* Fill oifs list. It is called under write locked mrt_lock. */
408 static void ipmr_update_thresholds(struct mfc_cache
*cache
, unsigned char *ttls
)
411 struct net
*net
= mfc_net(cache
);
413 cache
->mfc_un
.res
.minvif
= MAXVIFS
;
414 cache
->mfc_un
.res
.maxvif
= 0;
415 memset(cache
->mfc_un
.res
.ttls
, 255, MAXVIFS
);
417 for (vifi
= 0; vifi
< net
->ipv4
.maxvif
; vifi
++) {
418 if (VIF_EXISTS(net
, vifi
) &&
419 ttls
[vifi
] && ttls
[vifi
] < 255) {
420 cache
->mfc_un
.res
.ttls
[vifi
] = ttls
[vifi
];
421 if (cache
->mfc_un
.res
.minvif
> vifi
)
422 cache
->mfc_un
.res
.minvif
= vifi
;
423 if (cache
->mfc_un
.res
.maxvif
<= vifi
)
424 cache
->mfc_un
.res
.maxvif
= vifi
+ 1;
429 static int vif_add(struct net
*net
, struct vifctl
*vifc
, int mrtsock
)
431 int vifi
= vifc
->vifc_vifi
;
432 struct vif_device
*v
= &net
->ipv4
.vif_table
[vifi
];
433 struct net_device
*dev
;
434 struct in_device
*in_dev
;
438 if (VIF_EXISTS(net
, vifi
))
441 switch (vifc
->vifc_flags
) {
442 #ifdef CONFIG_IP_PIMSM
445 * Special Purpose VIF in PIM
446 * All the packets will be sent to the daemon
448 if (net
->ipv4
.mroute_reg_vif_num
>= 0)
450 dev
= ipmr_reg_vif(net
);
453 err
= dev_set_allmulti(dev
, 1);
455 unregister_netdevice(dev
);
462 dev
= ipmr_new_tunnel(net
, vifc
);
465 err
= dev_set_allmulti(dev
, 1);
467 ipmr_del_tunnel(dev
, vifc
);
473 dev
= ip_dev_find(net
, vifc
->vifc_lcl_addr
.s_addr
);
475 return -EADDRNOTAVAIL
;
476 err
= dev_set_allmulti(dev
, 1);
486 if ((in_dev
= __in_dev_get_rtnl(dev
)) == NULL
) {
488 return -EADDRNOTAVAIL
;
490 IPV4_DEVCONF(in_dev
->cnf
, MC_FORWARDING
)++;
491 ip_rt_multicast_event(in_dev
);
494 * Fill in the VIF structures
496 v
->rate_limit
= vifc
->vifc_rate_limit
;
497 v
->local
= vifc
->vifc_lcl_addr
.s_addr
;
498 v
->remote
= vifc
->vifc_rmt_addr
.s_addr
;
499 v
->flags
= vifc
->vifc_flags
;
501 v
->flags
|= VIFF_STATIC
;
502 v
->threshold
= vifc
->vifc_threshold
;
507 v
->link
= dev
->ifindex
;
508 if (v
->flags
&(VIFF_TUNNEL
|VIFF_REGISTER
))
509 v
->link
= dev
->iflink
;
511 /* And finish update writing critical data */
512 write_lock_bh(&mrt_lock
);
514 #ifdef CONFIG_IP_PIMSM
515 if (v
->flags
&VIFF_REGISTER
)
516 net
->ipv4
.mroute_reg_vif_num
= vifi
;
518 if (vifi
+1 > net
->ipv4
.maxvif
)
519 net
->ipv4
.maxvif
= vifi
+1;
520 write_unlock_bh(&mrt_lock
);
524 static struct mfc_cache
*ipmr_cache_find(struct net
*net
,
528 int line
= MFC_HASH(mcastgrp
, origin
);
531 for (c
= net
->ipv4
.mfc_cache_array
[line
]; c
; c
= c
->next
) {
532 if (c
->mfc_origin
==origin
&& c
->mfc_mcastgrp
==mcastgrp
)
539 * Allocate a multicast cache entry
541 static struct mfc_cache
*ipmr_cache_alloc(struct net
*net
)
543 struct mfc_cache
*c
= kmem_cache_zalloc(mrt_cachep
, GFP_KERNEL
);
546 c
->mfc_un
.res
.minvif
= MAXVIFS
;
551 static struct mfc_cache
*ipmr_cache_alloc_unres(struct net
*net
)
553 struct mfc_cache
*c
= kmem_cache_zalloc(mrt_cachep
, GFP_ATOMIC
);
556 skb_queue_head_init(&c
->mfc_un
.unres
.unresolved
);
557 c
->mfc_un
.unres
.expires
= jiffies
+ 10*HZ
;
563 * A cache entry has gone into a resolved state from queued
566 static void ipmr_cache_resolve(struct mfc_cache
*uc
, struct mfc_cache
*c
)
572 * Play the pending entries through our router
575 while ((skb
= __skb_dequeue(&uc
->mfc_un
.unres
.unresolved
))) {
576 if (ip_hdr(skb
)->version
== 0) {
577 struct nlmsghdr
*nlh
= (struct nlmsghdr
*)skb_pull(skb
, sizeof(struct iphdr
));
579 if (ipmr_fill_mroute(skb
, c
, NLMSG_DATA(nlh
)) > 0) {
580 nlh
->nlmsg_len
= (skb_tail_pointer(skb
) -
583 nlh
->nlmsg_type
= NLMSG_ERROR
;
584 nlh
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct nlmsgerr
));
585 skb_trim(skb
, nlh
->nlmsg_len
);
587 e
->error
= -EMSGSIZE
;
588 memset(&e
->msg
, 0, sizeof(e
->msg
));
591 rtnl_unicast(skb
, mfc_net(c
), NETLINK_CB(skb
).pid
);
593 ip_mr_forward(skb
, c
, 0);
598 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
599 * expects the following bizarre scheme.
601 * Called under mrt_lock.
604 static int ipmr_cache_report(struct net
*net
,
605 struct sk_buff
*pkt
, vifi_t vifi
, int assert)
608 const int ihl
= ip_hdrlen(pkt
);
609 struct igmphdr
*igmp
;
613 #ifdef CONFIG_IP_PIMSM
614 if (assert == IGMPMSG_WHOLEPKT
)
615 skb
= skb_realloc_headroom(pkt
, sizeof(struct iphdr
));
618 skb
= alloc_skb(128, GFP_ATOMIC
);
623 #ifdef CONFIG_IP_PIMSM
624 if (assert == IGMPMSG_WHOLEPKT
) {
625 /* Ugly, but we have no choice with this interface.
626 Duplicate old header, fix ihl, length etc.
627 And all this only to mangle msg->im_msgtype and
628 to set msg->im_mbz to "mbz" :-)
630 skb_push(skb
, sizeof(struct iphdr
));
631 skb_reset_network_header(skb
);
632 skb_reset_transport_header(skb
);
633 msg
= (struct igmpmsg
*)skb_network_header(skb
);
634 memcpy(msg
, skb_network_header(pkt
), sizeof(struct iphdr
));
635 msg
->im_msgtype
= IGMPMSG_WHOLEPKT
;
637 msg
->im_vif
= net
->ipv4
.mroute_reg_vif_num
;
638 ip_hdr(skb
)->ihl
= sizeof(struct iphdr
) >> 2;
639 ip_hdr(skb
)->tot_len
= htons(ntohs(ip_hdr(pkt
)->tot_len
) +
640 sizeof(struct iphdr
));
649 skb
->network_header
= skb
->tail
;
651 skb_copy_to_linear_data(skb
, pkt
->data
, ihl
);
652 ip_hdr(skb
)->protocol
= 0; /* Flag to the kernel this is a route add */
653 msg
= (struct igmpmsg
*)skb_network_header(skb
);
655 skb_dst_set(skb
, dst_clone(skb_dst(pkt
)));
661 igmp
=(struct igmphdr
*)skb_put(skb
, sizeof(struct igmphdr
));
663 msg
->im_msgtype
= assert;
665 ip_hdr(skb
)->tot_len
= htons(skb
->len
); /* Fix the length */
666 skb
->transport_header
= skb
->network_header
;
669 if (net
->ipv4
.mroute_sk
== NULL
) {
677 ret
= sock_queue_rcv_skb(net
->ipv4
.mroute_sk
, skb
);
680 printk(KERN_WARNING
"mroute: pending queue full, dropping entries.\n");
688 * Queue a packet for resolution. It gets locked cache entry!
692 ipmr_cache_unresolved(struct net
*net
, vifi_t vifi
, struct sk_buff
*skb
)
696 const struct iphdr
*iph
= ip_hdr(skb
);
698 spin_lock_bh(&mfc_unres_lock
);
699 for (c
=mfc_unres_queue
; c
; c
=c
->next
) {
700 if (net_eq(mfc_net(c
), net
) &&
701 c
->mfc_mcastgrp
== iph
->daddr
&&
702 c
->mfc_origin
== iph
->saddr
)
708 * Create a new entry if allowable
711 if (atomic_read(&net
->ipv4
.cache_resolve_queue_len
) >= 10 ||
712 (c
= ipmr_cache_alloc_unres(net
)) == NULL
) {
713 spin_unlock_bh(&mfc_unres_lock
);
720 * Fill in the new cache entry
723 c
->mfc_origin
= iph
->saddr
;
724 c
->mfc_mcastgrp
= iph
->daddr
;
727 * Reflect first query at mrouted.
729 err
= ipmr_cache_report(net
, skb
, vifi
, IGMPMSG_NOCACHE
);
731 /* If the report failed throw the cache entry
734 spin_unlock_bh(&mfc_unres_lock
);
741 atomic_inc(&net
->ipv4
.cache_resolve_queue_len
);
742 c
->next
= mfc_unres_queue
;
745 mod_timer(&ipmr_expire_timer
, c
->mfc_un
.unres
.expires
);
749 * See if we can append the packet
751 if (c
->mfc_un
.unres
.unresolved
.qlen
>3) {
755 skb_queue_tail(&c
->mfc_un
.unres
.unresolved
, skb
);
759 spin_unlock_bh(&mfc_unres_lock
);
764 * MFC cache manipulation by user space mroute daemon
767 static int ipmr_mfc_delete(struct net
*net
, struct mfcctl
*mfc
)
770 struct mfc_cache
*c
, **cp
;
772 line
= MFC_HASH(mfc
->mfcc_mcastgrp
.s_addr
, mfc
->mfcc_origin
.s_addr
);
774 for (cp
= &net
->ipv4
.mfc_cache_array
[line
];
775 (c
= *cp
) != NULL
; cp
= &c
->next
) {
776 if (c
->mfc_origin
== mfc
->mfcc_origin
.s_addr
&&
777 c
->mfc_mcastgrp
== mfc
->mfcc_mcastgrp
.s_addr
) {
778 write_lock_bh(&mrt_lock
);
780 write_unlock_bh(&mrt_lock
);
789 static int ipmr_mfc_add(struct net
*net
, struct mfcctl
*mfc
, int mrtsock
)
792 struct mfc_cache
*uc
, *c
, **cp
;
794 line
= MFC_HASH(mfc
->mfcc_mcastgrp
.s_addr
, mfc
->mfcc_origin
.s_addr
);
796 for (cp
= &net
->ipv4
.mfc_cache_array
[line
];
797 (c
= *cp
) != NULL
; cp
= &c
->next
) {
798 if (c
->mfc_origin
== mfc
->mfcc_origin
.s_addr
&&
799 c
->mfc_mcastgrp
== mfc
->mfcc_mcastgrp
.s_addr
)
804 write_lock_bh(&mrt_lock
);
805 c
->mfc_parent
= mfc
->mfcc_parent
;
806 ipmr_update_thresholds(c
, mfc
->mfcc_ttls
);
808 c
->mfc_flags
|= MFC_STATIC
;
809 write_unlock_bh(&mrt_lock
);
813 if (!ipv4_is_multicast(mfc
->mfcc_mcastgrp
.s_addr
))
816 c
= ipmr_cache_alloc(net
);
820 c
->mfc_origin
= mfc
->mfcc_origin
.s_addr
;
821 c
->mfc_mcastgrp
= mfc
->mfcc_mcastgrp
.s_addr
;
822 c
->mfc_parent
= mfc
->mfcc_parent
;
823 ipmr_update_thresholds(c
, mfc
->mfcc_ttls
);
825 c
->mfc_flags
|= MFC_STATIC
;
827 write_lock_bh(&mrt_lock
);
828 c
->next
= net
->ipv4
.mfc_cache_array
[line
];
829 net
->ipv4
.mfc_cache_array
[line
] = c
;
830 write_unlock_bh(&mrt_lock
);
833 * Check to see if we resolved a queued list. If so we
834 * need to send on the frames and tidy up.
836 spin_lock_bh(&mfc_unres_lock
);
837 for (cp
= &mfc_unres_queue
; (uc
=*cp
) != NULL
;
839 if (net_eq(mfc_net(uc
), net
) &&
840 uc
->mfc_origin
== c
->mfc_origin
&&
841 uc
->mfc_mcastgrp
== c
->mfc_mcastgrp
) {
843 atomic_dec(&net
->ipv4
.cache_resolve_queue_len
);
847 if (mfc_unres_queue
== NULL
)
848 del_timer(&ipmr_expire_timer
);
849 spin_unlock_bh(&mfc_unres_lock
);
852 ipmr_cache_resolve(uc
, c
);
859 * Close the multicast socket, and clear the vif tables etc
862 static void mroute_clean_tables(struct net
*net
)
867 * Shut down all active vif entries
869 for (i
= 0; i
< net
->ipv4
.maxvif
; i
++) {
870 if (!(net
->ipv4
.vif_table
[i
].flags
&VIFF_STATIC
))
871 vif_delete(net
, i
, 0);
877 for (i
=0; i
<MFC_LINES
; i
++) {
878 struct mfc_cache
*c
, **cp
;
880 cp
= &net
->ipv4
.mfc_cache_array
[i
];
881 while ((c
= *cp
) != NULL
) {
882 if (c
->mfc_flags
&MFC_STATIC
) {
886 write_lock_bh(&mrt_lock
);
888 write_unlock_bh(&mrt_lock
);
894 if (atomic_read(&net
->ipv4
.cache_resolve_queue_len
) != 0) {
895 struct mfc_cache
*c
, **cp
;
897 spin_lock_bh(&mfc_unres_lock
);
898 cp
= &mfc_unres_queue
;
899 while ((c
= *cp
) != NULL
) {
900 if (!net_eq(mfc_net(c
), net
)) {
906 ipmr_destroy_unres(c
);
908 spin_unlock_bh(&mfc_unres_lock
);
912 static void mrtsock_destruct(struct sock
*sk
)
914 struct net
*net
= sock_net(sk
);
917 if (sk
== net
->ipv4
.mroute_sk
) {
918 IPV4_DEVCONF_ALL(net
, MC_FORWARDING
)--;
920 write_lock_bh(&mrt_lock
);
921 net
->ipv4
.mroute_sk
= NULL
;
922 write_unlock_bh(&mrt_lock
);
924 mroute_clean_tables(net
);
930 * Socket options and virtual interface manipulation. The whole
931 * virtual interface system is a complete heap, but unfortunately
932 * that's how BSD mrouted happens to think. Maybe one day with a proper
933 * MOSPF/PIM router set up we can clean this up.
936 int ip_mroute_setsockopt(struct sock
*sk
, int optname
, char __user
*optval
, unsigned int optlen
)
941 struct net
*net
= sock_net(sk
);
943 if (optname
!= MRT_INIT
) {
944 if (sk
!= net
->ipv4
.mroute_sk
&& !capable(CAP_NET_ADMIN
))
950 if (sk
->sk_type
!= SOCK_RAW
||
951 inet_sk(sk
)->num
!= IPPROTO_IGMP
)
953 if (optlen
!= sizeof(int))
957 if (net
->ipv4
.mroute_sk
) {
962 ret
= ip_ra_control(sk
, 1, mrtsock_destruct
);
964 write_lock_bh(&mrt_lock
);
965 net
->ipv4
.mroute_sk
= sk
;
966 write_unlock_bh(&mrt_lock
);
968 IPV4_DEVCONF_ALL(net
, MC_FORWARDING
)++;
973 if (sk
!= net
->ipv4
.mroute_sk
)
975 return ip_ra_control(sk
, 0, NULL
);
978 if (optlen
!= sizeof(vif
))
980 if (copy_from_user(&vif
, optval
, sizeof(vif
)))
982 if (vif
.vifc_vifi
>= MAXVIFS
)
985 if (optname
== MRT_ADD_VIF
) {
986 ret
= vif_add(net
, &vif
, sk
== net
->ipv4
.mroute_sk
);
988 ret
= vif_delete(net
, vif
.vifc_vifi
, 0);
994 * Manipulate the forwarding caches. These live
995 * in a sort of kernel/user symbiosis.
999 if (optlen
!= sizeof(mfc
))
1001 if (copy_from_user(&mfc
, optval
, sizeof(mfc
)))
1004 if (optname
== MRT_DEL_MFC
)
1005 ret
= ipmr_mfc_delete(net
, &mfc
);
1007 ret
= ipmr_mfc_add(net
, &mfc
, sk
== net
->ipv4
.mroute_sk
);
1011 * Control PIM assert.
1016 if (get_user(v
,(int __user
*)optval
))
1018 net
->ipv4
.mroute_do_assert
= (v
) ? 1 : 0;
1021 #ifdef CONFIG_IP_PIMSM
1026 if (get_user(v
,(int __user
*)optval
))
1032 if (v
!= net
->ipv4
.mroute_do_pim
) {
1033 net
->ipv4
.mroute_do_pim
= v
;
1034 net
->ipv4
.mroute_do_assert
= v
;
1041 * Spurious command, or MRT_VERSION which you cannot
1045 return -ENOPROTOOPT
;
1050 * Getsock opt support for the multicast routing system.
1053 int ip_mroute_getsockopt(struct sock
*sk
, int optname
, char __user
*optval
, int __user
*optlen
)
1057 struct net
*net
= sock_net(sk
);
1059 if (optname
!= MRT_VERSION
&&
1060 #ifdef CONFIG_IP_PIMSM
1063 optname
!=MRT_ASSERT
)
1064 return -ENOPROTOOPT
;
1066 if (get_user(olr
, optlen
))
1069 olr
= min_t(unsigned int, olr
, sizeof(int));
1073 if (put_user(olr
, optlen
))
1075 if (optname
== MRT_VERSION
)
1077 #ifdef CONFIG_IP_PIMSM
1078 else if (optname
== MRT_PIM
)
1079 val
= net
->ipv4
.mroute_do_pim
;
1082 val
= net
->ipv4
.mroute_do_assert
;
1083 if (copy_to_user(optval
, &val
, olr
))
1089 * The IP multicast ioctl support routines.
1092 int ipmr_ioctl(struct sock
*sk
, int cmd
, void __user
*arg
)
1094 struct sioc_sg_req sr
;
1095 struct sioc_vif_req vr
;
1096 struct vif_device
*vif
;
1097 struct mfc_cache
*c
;
1098 struct net
*net
= sock_net(sk
);
1102 if (copy_from_user(&vr
, arg
, sizeof(vr
)))
1104 if (vr
.vifi
>= net
->ipv4
.maxvif
)
1106 read_lock(&mrt_lock
);
1107 vif
= &net
->ipv4
.vif_table
[vr
.vifi
];
1108 if (VIF_EXISTS(net
, vr
.vifi
)) {
1109 vr
.icount
= vif
->pkt_in
;
1110 vr
.ocount
= vif
->pkt_out
;
1111 vr
.ibytes
= vif
->bytes_in
;
1112 vr
.obytes
= vif
->bytes_out
;
1113 read_unlock(&mrt_lock
);
1115 if (copy_to_user(arg
, &vr
, sizeof(vr
)))
1119 read_unlock(&mrt_lock
);
1120 return -EADDRNOTAVAIL
;
1122 if (copy_from_user(&sr
, arg
, sizeof(sr
)))
1125 read_lock(&mrt_lock
);
1126 c
= ipmr_cache_find(net
, sr
.src
.s_addr
, sr
.grp
.s_addr
);
1128 sr
.pktcnt
= c
->mfc_un
.res
.pkt
;
1129 sr
.bytecnt
= c
->mfc_un
.res
.bytes
;
1130 sr
.wrong_if
= c
->mfc_un
.res
.wrong_if
;
1131 read_unlock(&mrt_lock
);
1133 if (copy_to_user(arg
, &sr
, sizeof(sr
)))
1137 read_unlock(&mrt_lock
);
1138 return -EADDRNOTAVAIL
;
1140 return -ENOIOCTLCMD
;
1145 static int ipmr_device_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
1147 struct net_device
*dev
= ptr
;
1148 struct net
*net
= dev_net(dev
);
1149 struct vif_device
*v
;
1152 if (!net_eq(dev_net(dev
), net
))
1155 if (event
!= NETDEV_UNREGISTER
)
1157 v
= &net
->ipv4
.vif_table
[0];
1158 for (ct
= 0; ct
< net
->ipv4
.maxvif
; ct
++, v
++) {
1160 vif_delete(net
, ct
, 1);
1166 static struct notifier_block ip_mr_notifier
= {
1167 .notifier_call
= ipmr_device_event
,
1171 * Encapsulate a packet by attaching a valid IPIP header to it.
1172 * This avoids tunnel drivers and other mess and gives us the speed so
1173 * important for multicast video.
1176 static void ip_encap(struct sk_buff
*skb
, __be32 saddr
, __be32 daddr
)
1179 struct iphdr
*old_iph
= ip_hdr(skb
);
1181 skb_push(skb
, sizeof(struct iphdr
));
1182 skb
->transport_header
= skb
->network_header
;
1183 skb_reset_network_header(skb
);
1187 iph
->tos
= old_iph
->tos
;
1188 iph
->ttl
= old_iph
->ttl
;
1192 iph
->protocol
= IPPROTO_IPIP
;
1194 iph
->tot_len
= htons(skb
->len
);
1195 ip_select_ident(iph
, skb_dst(skb
), NULL
);
1198 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
1202 static inline int ipmr_forward_finish(struct sk_buff
*skb
)
1204 struct ip_options
* opt
= &(IPCB(skb
)->opt
);
1206 IP_INC_STATS_BH(dev_net(skb_dst(skb
)->dev
), IPSTATS_MIB_OUTFORWDATAGRAMS
);
1208 if (unlikely(opt
->optlen
))
1209 ip_forward_options(skb
);
1211 return dst_output(skb
);
1215 * Processing handlers for ipmr_forward
1218 static void ipmr_queue_xmit(struct sk_buff
*skb
, struct mfc_cache
*c
, int vifi
)
1220 struct net
*net
= mfc_net(c
);
1221 const struct iphdr
*iph
= ip_hdr(skb
);
1222 struct vif_device
*vif
= &net
->ipv4
.vif_table
[vifi
];
1223 struct net_device
*dev
;
1227 if (vif
->dev
== NULL
)
1230 #ifdef CONFIG_IP_PIMSM
1231 if (vif
->flags
& VIFF_REGISTER
) {
1233 vif
->bytes_out
+= skb
->len
;
1234 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
1235 vif
->dev
->stats
.tx_packets
++;
1236 ipmr_cache_report(net
, skb
, vifi
, IGMPMSG_WHOLEPKT
);
1241 if (vif
->flags
&VIFF_TUNNEL
) {
1242 struct flowi fl
= { .oif
= vif
->link
,
1244 { .daddr
= vif
->remote
,
1245 .saddr
= vif
->local
,
1246 .tos
= RT_TOS(iph
->tos
) } },
1247 .proto
= IPPROTO_IPIP
};
1248 if (ip_route_output_key(net
, &rt
, &fl
))
1250 encap
= sizeof(struct iphdr
);
1252 struct flowi fl
= { .oif
= vif
->link
,
1254 { .daddr
= iph
->daddr
,
1255 .tos
= RT_TOS(iph
->tos
) } },
1256 .proto
= IPPROTO_IPIP
};
1257 if (ip_route_output_key(net
, &rt
, &fl
))
1261 dev
= rt
->u
.dst
.dev
;
1263 if (skb
->len
+encap
> dst_mtu(&rt
->u
.dst
) && (ntohs(iph
->frag_off
) & IP_DF
)) {
1264 /* Do not fragment multicasts. Alas, IPv4 does not
1265 allow to send ICMP, so that packets will disappear
1269 IP_INC_STATS_BH(dev_net(dev
), IPSTATS_MIB_FRAGFAILS
);
1274 encap
+= LL_RESERVED_SPACE(dev
) + rt
->u
.dst
.header_len
;
1276 if (skb_cow(skb
, encap
)) {
1282 vif
->bytes_out
+= skb
->len
;
1285 skb_dst_set(skb
, &rt
->u
.dst
);
1286 ip_decrease_ttl(ip_hdr(skb
));
1288 /* FIXME: forward and output firewalls used to be called here.
1289 * What do we do with netfilter? -- RR */
1290 if (vif
->flags
& VIFF_TUNNEL
) {
1291 ip_encap(skb
, vif
->local
, vif
->remote
);
1292 /* FIXME: extra output firewall step used to be here. --RR */
1293 vif
->dev
->stats
.tx_packets
++;
1294 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
1297 IPCB(skb
)->flags
|= IPSKB_FORWARDED
;
1300 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1301 * not only before forwarding, but after forwarding on all output
1302 * interfaces. It is clear, if mrouter runs a multicasting
1303 * program, it should receive packets not depending to what interface
1304 * program is joined.
1305 * If we will not make it, the program will have to join on all
1306 * interfaces. On the other hand, multihoming host (or router, but
1307 * not mrouter) cannot join to more than one interface - it will
1308 * result in receiving multiple packets.
1310 NF_HOOK(PF_INET
, NF_INET_FORWARD
, skb
, skb
->dev
, dev
,
1311 ipmr_forward_finish
);
1319 static int ipmr_find_vif(struct net_device
*dev
)
1321 struct net
*net
= dev_net(dev
);
1323 for (ct
= net
->ipv4
.maxvif
-1; ct
>= 0; ct
--) {
1324 if (net
->ipv4
.vif_table
[ct
].dev
== dev
)
1330 /* "local" means that we should preserve one skb (for local delivery) */
1332 static int ip_mr_forward(struct sk_buff
*skb
, struct mfc_cache
*cache
, int local
)
1336 struct net
*net
= mfc_net(cache
);
1338 vif
= cache
->mfc_parent
;
1339 cache
->mfc_un
.res
.pkt
++;
1340 cache
->mfc_un
.res
.bytes
+= skb
->len
;
1343 * Wrong interface: drop packet and (maybe) send PIM assert.
1345 if (net
->ipv4
.vif_table
[vif
].dev
!= skb
->dev
) {
1348 if (skb_rtable(skb
)->fl
.iif
== 0) {
1349 /* It is our own packet, looped back.
1350 Very complicated situation...
1352 The best workaround until routing daemons will be
1353 fixed is not to redistribute packet, if it was
1354 send through wrong interface. It means, that
1355 multicast applications WILL NOT work for
1356 (S,G), which have default multicast route pointing
1357 to wrong oif. In any case, it is not a good
1358 idea to use multicasting applications on router.
1363 cache
->mfc_un
.res
.wrong_if
++;
1364 true_vifi
= ipmr_find_vif(skb
->dev
);
1366 if (true_vifi
>= 0 && net
->ipv4
.mroute_do_assert
&&
1367 /* pimsm uses asserts, when switching from RPT to SPT,
1368 so that we cannot check that packet arrived on an oif.
1369 It is bad, but otherwise we would need to move pretty
1370 large chunk of pimd to kernel. Ough... --ANK
1372 (net
->ipv4
.mroute_do_pim
||
1373 cache
->mfc_un
.res
.ttls
[true_vifi
] < 255) &&
1375 cache
->mfc_un
.res
.last_assert
+ MFC_ASSERT_THRESH
)) {
1376 cache
->mfc_un
.res
.last_assert
= jiffies
;
1377 ipmr_cache_report(net
, skb
, true_vifi
, IGMPMSG_WRONGVIF
);
1382 net
->ipv4
.vif_table
[vif
].pkt_in
++;
1383 net
->ipv4
.vif_table
[vif
].bytes_in
+= skb
->len
;
1388 for (ct
= cache
->mfc_un
.res
.maxvif
-1; ct
>= cache
->mfc_un
.res
.minvif
; ct
--) {
1389 if (ip_hdr(skb
)->ttl
> cache
->mfc_un
.res
.ttls
[ct
]) {
1391 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1393 ipmr_queue_xmit(skb2
, cache
, psend
);
1400 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1402 ipmr_queue_xmit(skb2
, cache
, psend
);
1404 ipmr_queue_xmit(skb
, cache
, psend
);
1417 * Multicast packets for forwarding arrive here
1420 int ip_mr_input(struct sk_buff
*skb
)
1422 struct mfc_cache
*cache
;
1423 struct net
*net
= dev_net(skb
->dev
);
1424 int local
= skb_rtable(skb
)->rt_flags
& RTCF_LOCAL
;
1426 /* Packet is looped back after forward, it should not be
1427 forwarded second time, but still can be delivered locally.
1429 if (IPCB(skb
)->flags
&IPSKB_FORWARDED
)
1433 if (IPCB(skb
)->opt
.router_alert
) {
1434 if (ip_call_ra_chain(skb
))
1436 } else if (ip_hdr(skb
)->protocol
== IPPROTO_IGMP
){
1437 /* IGMPv1 (and broken IGMPv2 implementations sort of
1438 Cisco IOS <= 11.2(8)) do not put router alert
1439 option to IGMP packets destined to routable
1440 groups. It is very bad, because it means
1441 that we can forward NO IGMP messages.
1443 read_lock(&mrt_lock
);
1444 if (net
->ipv4
.mroute_sk
) {
1446 raw_rcv(net
->ipv4
.mroute_sk
, skb
);
1447 read_unlock(&mrt_lock
);
1450 read_unlock(&mrt_lock
);
1454 read_lock(&mrt_lock
);
1455 cache
= ipmr_cache_find(net
, ip_hdr(skb
)->saddr
, ip_hdr(skb
)->daddr
);
1458 * No usable cache entry
1460 if (cache
== NULL
) {
1464 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
1465 ip_local_deliver(skb
);
1467 read_unlock(&mrt_lock
);
1473 vif
= ipmr_find_vif(skb
->dev
);
1475 int err
= ipmr_cache_unresolved(net
, vif
, skb
);
1476 read_unlock(&mrt_lock
);
1480 read_unlock(&mrt_lock
);
1485 ip_mr_forward(skb
, cache
, local
);
1487 read_unlock(&mrt_lock
);
1490 return ip_local_deliver(skb
);
1496 return ip_local_deliver(skb
);
1501 #ifdef CONFIG_IP_PIMSM
1502 static int __pim_rcv(struct sk_buff
*skb
, unsigned int pimlen
)
1504 struct net_device
*reg_dev
= NULL
;
1505 struct iphdr
*encap
;
1506 struct net
*net
= dev_net(skb
->dev
);
1508 encap
= (struct iphdr
*)(skb_transport_header(skb
) + pimlen
);
1511 a. packet is really destinted to a multicast group
1512 b. packet is not a NULL-REGISTER
1513 c. packet is not truncated
1515 if (!ipv4_is_multicast(encap
->daddr
) ||
1516 encap
->tot_len
== 0 ||
1517 ntohs(encap
->tot_len
) + pimlen
> skb
->len
)
1520 read_lock(&mrt_lock
);
1521 if (net
->ipv4
.mroute_reg_vif_num
>= 0)
1522 reg_dev
= net
->ipv4
.vif_table
[net
->ipv4
.mroute_reg_vif_num
].dev
;
1525 read_unlock(&mrt_lock
);
1527 if (reg_dev
== NULL
)
1530 skb
->mac_header
= skb
->network_header
;
1531 skb_pull(skb
, (u8
*)encap
- skb
->data
);
1532 skb_reset_network_header(skb
);
1534 skb
->protocol
= htons(ETH_P_IP
);
1536 skb
->pkt_type
= PACKET_HOST
;
1538 reg_dev
->stats
.rx_bytes
+= skb
->len
;
1539 reg_dev
->stats
.rx_packets
++;
1548 #ifdef CONFIG_IP_PIMSM_V1
1550 * Handle IGMP messages of PIMv1
1553 int pim_rcv_v1(struct sk_buff
* skb
)
1555 struct igmphdr
*pim
;
1556 struct net
*net
= dev_net(skb
->dev
);
1558 if (!pskb_may_pull(skb
, sizeof(*pim
) + sizeof(struct iphdr
)))
1561 pim
= igmp_hdr(skb
);
1563 if (!net
->ipv4
.mroute_do_pim
||
1564 pim
->group
!= PIM_V1_VERSION
|| pim
->code
!= PIM_V1_REGISTER
)
1567 if (__pim_rcv(skb
, sizeof(*pim
))) {
1575 #ifdef CONFIG_IP_PIMSM_V2
1576 static int pim_rcv(struct sk_buff
* skb
)
1578 struct pimreghdr
*pim
;
1580 if (!pskb_may_pull(skb
, sizeof(*pim
) + sizeof(struct iphdr
)))
1583 pim
= (struct pimreghdr
*)skb_transport_header(skb
);
1584 if (pim
->type
!= ((PIM_VERSION
<<4)|(PIM_REGISTER
)) ||
1585 (pim
->flags
&PIM_NULL_REGISTER
) ||
1586 (ip_compute_csum((void *)pim
, sizeof(*pim
)) != 0 &&
1587 csum_fold(skb_checksum(skb
, 0, skb
->len
, 0))))
1590 if (__pim_rcv(skb
, sizeof(*pim
))) {
1599 ipmr_fill_mroute(struct sk_buff
*skb
, struct mfc_cache
*c
, struct rtmsg
*rtm
)
1602 struct rtnexthop
*nhp
;
1603 struct net
*net
= mfc_net(c
);
1604 struct net_device
*dev
= net
->ipv4
.vif_table
[c
->mfc_parent
].dev
;
1605 u8
*b
= skb_tail_pointer(skb
);
1606 struct rtattr
*mp_head
;
1609 RTA_PUT(skb
, RTA_IIF
, 4, &dev
->ifindex
);
1611 mp_head
= (struct rtattr
*)skb_put(skb
, RTA_LENGTH(0));
1613 for (ct
= c
->mfc_un
.res
.minvif
; ct
< c
->mfc_un
.res
.maxvif
; ct
++) {
1614 if (c
->mfc_un
.res
.ttls
[ct
] < 255) {
1615 if (skb_tailroom(skb
) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp
)) + 4))
1616 goto rtattr_failure
;
1617 nhp
= (struct rtnexthop
*)skb_put(skb
, RTA_ALIGN(sizeof(*nhp
)));
1618 nhp
->rtnh_flags
= 0;
1619 nhp
->rtnh_hops
= c
->mfc_un
.res
.ttls
[ct
];
1620 nhp
->rtnh_ifindex
= net
->ipv4
.vif_table
[ct
].dev
->ifindex
;
1621 nhp
->rtnh_len
= sizeof(*nhp
);
1624 mp_head
->rta_type
= RTA_MULTIPATH
;
1625 mp_head
->rta_len
= skb_tail_pointer(skb
) - (u8
*)mp_head
;
1626 rtm
->rtm_type
= RTN_MULTICAST
;
1634 int ipmr_get_route(struct net
*net
,
1635 struct sk_buff
*skb
, struct rtmsg
*rtm
, int nowait
)
1638 struct mfc_cache
*cache
;
1639 struct rtable
*rt
= skb_rtable(skb
);
1641 read_lock(&mrt_lock
);
1642 cache
= ipmr_cache_find(net
, rt
->rt_src
, rt
->rt_dst
);
1644 if (cache
== NULL
) {
1645 struct sk_buff
*skb2
;
1647 struct net_device
*dev
;
1651 read_unlock(&mrt_lock
);
1656 if (dev
== NULL
|| (vif
= ipmr_find_vif(dev
)) < 0) {
1657 read_unlock(&mrt_lock
);
1660 skb2
= skb_clone(skb
, GFP_ATOMIC
);
1662 read_unlock(&mrt_lock
);
1666 skb_push(skb2
, sizeof(struct iphdr
));
1667 skb_reset_network_header(skb2
);
1669 iph
->ihl
= sizeof(struct iphdr
) >> 2;
1670 iph
->saddr
= rt
->rt_src
;
1671 iph
->daddr
= rt
->rt_dst
;
1673 err
= ipmr_cache_unresolved(net
, vif
, skb2
);
1674 read_unlock(&mrt_lock
);
1678 if (!nowait
&& (rtm
->rtm_flags
&RTM_F_NOTIFY
))
1679 cache
->mfc_flags
|= MFC_NOTIFY
;
1680 err
= ipmr_fill_mroute(skb
, cache
, rtm
);
1681 read_unlock(&mrt_lock
);
1685 #ifdef CONFIG_PROC_FS
1687 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1689 struct ipmr_vif_iter
{
1690 struct seq_net_private p
;
1694 static struct vif_device
*ipmr_vif_seq_idx(struct net
*net
,
1695 struct ipmr_vif_iter
*iter
,
1698 for (iter
->ct
= 0; iter
->ct
< net
->ipv4
.maxvif
; ++iter
->ct
) {
1699 if (!VIF_EXISTS(net
, iter
->ct
))
1702 return &net
->ipv4
.vif_table
[iter
->ct
];
1707 static void *ipmr_vif_seq_start(struct seq_file
*seq
, loff_t
*pos
)
1708 __acquires(mrt_lock
)
1710 struct net
*net
= seq_file_net(seq
);
1712 read_lock(&mrt_lock
);
1713 return *pos
? ipmr_vif_seq_idx(net
, seq
->private, *pos
- 1)
1717 static void *ipmr_vif_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
1719 struct ipmr_vif_iter
*iter
= seq
->private;
1720 struct net
*net
= seq_file_net(seq
);
1723 if (v
== SEQ_START_TOKEN
)
1724 return ipmr_vif_seq_idx(net
, iter
, 0);
1726 while (++iter
->ct
< net
->ipv4
.maxvif
) {
1727 if (!VIF_EXISTS(net
, iter
->ct
))
1729 return &net
->ipv4
.vif_table
[iter
->ct
];
1734 static void ipmr_vif_seq_stop(struct seq_file
*seq
, void *v
)
1735 __releases(mrt_lock
)
1737 read_unlock(&mrt_lock
);
1740 static int ipmr_vif_seq_show(struct seq_file
*seq
, void *v
)
1742 struct net
*net
= seq_file_net(seq
);
1744 if (v
== SEQ_START_TOKEN
) {
1746 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1748 const struct vif_device
*vif
= v
;
1749 const char *name
= vif
->dev
? vif
->dev
->name
: "none";
1752 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1753 vif
- net
->ipv4
.vif_table
,
1754 name
, vif
->bytes_in
, vif
->pkt_in
,
1755 vif
->bytes_out
, vif
->pkt_out
,
1756 vif
->flags
, vif
->local
, vif
->remote
);
1761 static const struct seq_operations ipmr_vif_seq_ops
= {
1762 .start
= ipmr_vif_seq_start
,
1763 .next
= ipmr_vif_seq_next
,
1764 .stop
= ipmr_vif_seq_stop
,
1765 .show
= ipmr_vif_seq_show
,
1768 static int ipmr_vif_open(struct inode
*inode
, struct file
*file
)
1770 return seq_open_net(inode
, file
, &ipmr_vif_seq_ops
,
1771 sizeof(struct ipmr_vif_iter
));
1774 static const struct file_operations ipmr_vif_fops
= {
1775 .owner
= THIS_MODULE
,
1776 .open
= ipmr_vif_open
,
1778 .llseek
= seq_lseek
,
1779 .release
= seq_release_net
,
1782 struct ipmr_mfc_iter
{
1783 struct seq_net_private p
;
1784 struct mfc_cache
**cache
;
1789 static struct mfc_cache
*ipmr_mfc_seq_idx(struct net
*net
,
1790 struct ipmr_mfc_iter
*it
, loff_t pos
)
1792 struct mfc_cache
*mfc
;
1794 it
->cache
= net
->ipv4
.mfc_cache_array
;
1795 read_lock(&mrt_lock
);
1796 for (it
->ct
= 0; it
->ct
< MFC_LINES
; it
->ct
++)
1797 for (mfc
= net
->ipv4
.mfc_cache_array
[it
->ct
];
1798 mfc
; mfc
= mfc
->next
)
1801 read_unlock(&mrt_lock
);
1803 it
->cache
= &mfc_unres_queue
;
1804 spin_lock_bh(&mfc_unres_lock
);
1805 for (mfc
= mfc_unres_queue
; mfc
; mfc
= mfc
->next
)
1806 if (net_eq(mfc_net(mfc
), net
) &&
1809 spin_unlock_bh(&mfc_unres_lock
);
1816 static void *ipmr_mfc_seq_start(struct seq_file
*seq
, loff_t
*pos
)
1818 struct ipmr_mfc_iter
*it
= seq
->private;
1819 struct net
*net
= seq_file_net(seq
);
1823 return *pos
? ipmr_mfc_seq_idx(net
, seq
->private, *pos
- 1)
1827 static void *ipmr_mfc_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
1829 struct mfc_cache
*mfc
= v
;
1830 struct ipmr_mfc_iter
*it
= seq
->private;
1831 struct net
*net
= seq_file_net(seq
);
1835 if (v
== SEQ_START_TOKEN
)
1836 return ipmr_mfc_seq_idx(net
, seq
->private, 0);
1841 if (it
->cache
== &mfc_unres_queue
)
1844 BUG_ON(it
->cache
!= net
->ipv4
.mfc_cache_array
);
1846 while (++it
->ct
< MFC_LINES
) {
1847 mfc
= net
->ipv4
.mfc_cache_array
[it
->ct
];
1852 /* exhausted cache_array, show unresolved */
1853 read_unlock(&mrt_lock
);
1854 it
->cache
= &mfc_unres_queue
;
1857 spin_lock_bh(&mfc_unres_lock
);
1858 mfc
= mfc_unres_queue
;
1859 while (mfc
&& !net_eq(mfc_net(mfc
), net
))
1865 spin_unlock_bh(&mfc_unres_lock
);
1871 static void ipmr_mfc_seq_stop(struct seq_file
*seq
, void *v
)
1873 struct ipmr_mfc_iter
*it
= seq
->private;
1874 struct net
*net
= seq_file_net(seq
);
1876 if (it
->cache
== &mfc_unres_queue
)
1877 spin_unlock_bh(&mfc_unres_lock
);
1878 else if (it
->cache
== net
->ipv4
.mfc_cache_array
)
1879 read_unlock(&mrt_lock
);
1882 static int ipmr_mfc_seq_show(struct seq_file
*seq
, void *v
)
1885 struct net
*net
= seq_file_net(seq
);
1887 if (v
== SEQ_START_TOKEN
) {
1889 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1891 const struct mfc_cache
*mfc
= v
;
1892 const struct ipmr_mfc_iter
*it
= seq
->private;
1894 seq_printf(seq
, "%08lX %08lX %-3hd",
1895 (unsigned long) mfc
->mfc_mcastgrp
,
1896 (unsigned long) mfc
->mfc_origin
,
1899 if (it
->cache
!= &mfc_unres_queue
) {
1900 seq_printf(seq
, " %8lu %8lu %8lu",
1901 mfc
->mfc_un
.res
.pkt
,
1902 mfc
->mfc_un
.res
.bytes
,
1903 mfc
->mfc_un
.res
.wrong_if
);
1904 for (n
= mfc
->mfc_un
.res
.minvif
;
1905 n
< mfc
->mfc_un
.res
.maxvif
; n
++ ) {
1906 if (VIF_EXISTS(net
, n
) &&
1907 mfc
->mfc_un
.res
.ttls
[n
] < 255)
1910 n
, mfc
->mfc_un
.res
.ttls
[n
]);
1913 /* unresolved mfc_caches don't contain
1914 * pkt, bytes and wrong_if values
1916 seq_printf(seq
, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1918 seq_putc(seq
, '\n');
1923 static const struct seq_operations ipmr_mfc_seq_ops
= {
1924 .start
= ipmr_mfc_seq_start
,
1925 .next
= ipmr_mfc_seq_next
,
1926 .stop
= ipmr_mfc_seq_stop
,
1927 .show
= ipmr_mfc_seq_show
,
1930 static int ipmr_mfc_open(struct inode
*inode
, struct file
*file
)
1932 return seq_open_net(inode
, file
, &ipmr_mfc_seq_ops
,
1933 sizeof(struct ipmr_mfc_iter
));
1936 static const struct file_operations ipmr_mfc_fops
= {
1937 .owner
= THIS_MODULE
,
1938 .open
= ipmr_mfc_open
,
1940 .llseek
= seq_lseek
,
1941 .release
= seq_release_net
,
1945 #ifdef CONFIG_IP_PIMSM_V2
1946 static const struct net_protocol pim_protocol
= {
1954 * Setup for IP multicast routing
1956 static int __net_init
ipmr_net_init(struct net
*net
)
1960 net
->ipv4
.vif_table
= kcalloc(MAXVIFS
, sizeof(struct vif_device
),
1962 if (!net
->ipv4
.vif_table
) {
1967 /* Forwarding cache */
1968 net
->ipv4
.mfc_cache_array
= kcalloc(MFC_LINES
,
1969 sizeof(struct mfc_cache
*),
1971 if (!net
->ipv4
.mfc_cache_array
) {
1973 goto fail_mfc_cache
;
1976 #ifdef CONFIG_IP_PIMSM
1977 net
->ipv4
.mroute_reg_vif_num
= -1;
1980 #ifdef CONFIG_PROC_FS
1982 if (!proc_net_fops_create(net
, "ip_mr_vif", 0, &ipmr_vif_fops
))
1984 if (!proc_net_fops_create(net
, "ip_mr_cache", 0, &ipmr_mfc_fops
))
1985 goto proc_cache_fail
;
1989 #ifdef CONFIG_PROC_FS
1991 proc_net_remove(net
, "ip_mr_vif");
1993 kfree(net
->ipv4
.mfc_cache_array
);
1996 kfree(net
->ipv4
.vif_table
);
2001 static void __net_exit
ipmr_net_exit(struct net
*net
)
2003 #ifdef CONFIG_PROC_FS
2004 proc_net_remove(net
, "ip_mr_cache");
2005 proc_net_remove(net
, "ip_mr_vif");
2007 kfree(net
->ipv4
.mfc_cache_array
);
2008 kfree(net
->ipv4
.vif_table
);
2011 static struct pernet_operations ipmr_net_ops
= {
2012 .init
= ipmr_net_init
,
2013 .exit
= ipmr_net_exit
,
2016 int __init
ip_mr_init(void)
2020 mrt_cachep
= kmem_cache_create("ip_mrt_cache",
2021 sizeof(struct mfc_cache
),
2022 0, SLAB_HWCACHE_ALIGN
|SLAB_PANIC
,
2027 err
= register_pernet_subsys(&ipmr_net_ops
);
2029 goto reg_pernet_fail
;
2031 setup_timer(&ipmr_expire_timer
, ipmr_expire_process
, 0);
2032 err
= register_netdevice_notifier(&ip_mr_notifier
);
2034 goto reg_notif_fail
;
2035 #ifdef CONFIG_IP_PIMSM_V2
2036 if (inet_add_protocol(&pim_protocol
, IPPROTO_PIM
) < 0) {
2037 printk(KERN_ERR
"ip_mr_init: can't add PIM protocol\n");
2039 goto add_proto_fail
;
2044 #ifdef CONFIG_IP_PIMSM_V2
2046 unregister_netdevice_notifier(&ip_mr_notifier
);
2049 del_timer(&ipmr_expire_timer
);
2050 unregister_pernet_subsys(&ipmr_net_ops
);
2052 kmem_cache_destroy(mrt_cachep
);