2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size
= 4096;
76 static int ip6_rt_gc_min_interval
= HZ
/ 2;
77 static int ip6_rt_gc_timeout
= 60*HZ
;
78 int ip6_rt_gc_interval
= 30*HZ
;
79 static int ip6_rt_gc_elasticity
= 9;
80 static int ip6_rt_mtu_expires
= 10*60*HZ
;
81 static int ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
83 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
);
84 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
85 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
86 static void ip6_dst_destroy(struct dst_entry
*);
87 static void ip6_dst_ifdown(struct dst_entry
*,
88 struct net_device
*dev
, int how
);
89 static int ip6_dst_gc(void);
91 static int ip6_pkt_discard(struct sk_buff
*skb
);
92 static int ip6_pkt_discard_out(struct sk_buff
*skb
);
93 static void ip6_link_failure(struct sk_buff
*skb
);
94 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
96 static struct dst_ops ip6_dst_ops
= {
98 .protocol
= __constant_htons(ETH_P_IPV6
),
101 .check
= ip6_dst_check
,
102 .destroy
= ip6_dst_destroy
,
103 .ifdown
= ip6_dst_ifdown
,
104 .negative_advice
= ip6_negative_advice
,
105 .link_failure
= ip6_link_failure
,
106 .update_pmtu
= ip6_rt_update_pmtu
,
107 .entry_size
= sizeof(struct rt6_info
),
110 struct rt6_info ip6_null_entry
= {
113 .__refcnt
= ATOMIC_INIT(1),
115 .dev
= &loopback_dev
,
117 .error
= -ENETUNREACH
,
118 .metrics
= { [RTAX_HOPLIMIT
- 1] = 255, },
119 .input
= ip6_pkt_discard
,
120 .output
= ip6_pkt_discard_out
,
122 .path
= (struct dst_entry
*)&ip6_null_entry
,
125 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
126 .rt6i_metric
= ~(u32
) 0,
127 .rt6i_ref
= ATOMIC_INIT(1),
130 struct fib6_node ip6_routing_table
= {
131 .leaf
= &ip6_null_entry
,
132 .fn_flags
= RTN_ROOT
| RTN_TL_ROOT
| RTN_RTINFO
,
135 /* Protects all the ip6 fib */
137 DEFINE_RWLOCK(rt6_lock
);
140 /* allocate dst with ip6_dst_ops */
141 static __inline__
struct rt6_info
*ip6_dst_alloc(void)
143 return (struct rt6_info
*)dst_alloc(&ip6_dst_ops
);
146 static void ip6_dst_destroy(struct dst_entry
*dst
)
148 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
149 struct inet6_dev
*idev
= rt
->rt6i_idev
;
152 rt
->rt6i_idev
= NULL
;
157 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
160 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
161 struct inet6_dev
*idev
= rt
->rt6i_idev
;
163 if (dev
!= &loopback_dev
&& idev
!= NULL
&& idev
->dev
== dev
) {
164 struct inet6_dev
*loopback_idev
= in6_dev_get(&loopback_dev
);
165 if (loopback_idev
!= NULL
) {
166 rt
->rt6i_idev
= loopback_idev
;
172 static __inline__
int rt6_check_expired(const struct rt6_info
*rt
)
174 return (rt
->rt6i_flags
& RTF_EXPIRES
&&
175 time_after(jiffies
, rt
->rt6i_expires
));
179 * Route lookup. Any rt6_lock is implied.
182 static __inline__
struct rt6_info
*rt6_device_match(struct rt6_info
*rt
,
186 struct rt6_info
*local
= NULL
;
187 struct rt6_info
*sprt
;
190 for (sprt
= rt
; sprt
; sprt
= sprt
->u
.next
) {
191 struct net_device
*dev
= sprt
->rt6i_dev
;
192 if (dev
->ifindex
== oif
)
194 if (dev
->flags
& IFF_LOOPBACK
) {
195 if (sprt
->rt6i_idev
== NULL
||
196 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
199 if (local
&& (!oif
||
200 local
->rt6i_idev
->dev
->ifindex
== oif
))
211 return &ip6_null_entry
;
217 * pointer to the last default router chosen. BH is disabled locally.
219 static struct rt6_info
*rt6_dflt_pointer
;
220 static DEFINE_SPINLOCK(rt6_dflt_lock
);
222 void rt6_reset_dflt_pointer(struct rt6_info
*rt
)
224 spin_lock_bh(&rt6_dflt_lock
);
225 if (rt
== NULL
|| rt
== rt6_dflt_pointer
) {
226 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer
);
227 rt6_dflt_pointer
= NULL
;
229 spin_unlock_bh(&rt6_dflt_lock
);
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info
*rt6_best_dflt(struct rt6_info
*rt
, int oif
)
235 struct rt6_info
*match
= NULL
;
236 struct rt6_info
*sprt
;
239 for (sprt
= rt
; sprt
; sprt
= sprt
->u
.next
) {
240 struct neighbour
*neigh
;
245 sprt
->rt6i_dev
->ifindex
== oif
))
248 if (rt6_check_expired(sprt
))
251 if (sprt
== rt6_dflt_pointer
)
254 if ((neigh
= sprt
->rt6i_nexthop
) != NULL
) {
255 read_lock_bh(&neigh
->lock
);
256 switch (neigh
->nud_state
) {
274 read_unlock_bh(&neigh
->lock
);
277 read_unlock_bh(&neigh
->lock
);
282 if (m
> mpri
|| m
>= 12) {
286 /* we choose the last default router if it
287 * is in (probably) reachable state.
288 * If route changed, we should do pmtu
289 * discovery. --yoshfuji
296 spin_lock(&rt6_dflt_lock
);
299 * No default routers are known to be reachable.
302 if (rt6_dflt_pointer
) {
303 for (sprt
= rt6_dflt_pointer
->u
.next
;
304 sprt
; sprt
= sprt
->u
.next
) {
305 if (sprt
->u
.dst
.obsolete
<= 0 &&
306 sprt
->u
.dst
.error
== 0 &&
307 !rt6_check_expired(sprt
)) {
314 sprt
= sprt
->u
.next
) {
315 if (sprt
->u
.dst
.obsolete
<= 0 &&
316 sprt
->u
.dst
.error
== 0 &&
317 !rt6_check_expired(sprt
)) {
321 if (sprt
== rt6_dflt_pointer
)
328 if (rt6_dflt_pointer
!= match
)
329 RT6_TRACE("changed default router: %p->%p\n",
330 rt6_dflt_pointer
, match
);
331 rt6_dflt_pointer
= match
;
333 spin_unlock(&rt6_dflt_lock
);
337 * Last Resort: if no default routers found,
338 * use addrconf default route.
339 * We don't record this route.
341 for (sprt
= ip6_routing_table
.leaf
;
342 sprt
; sprt
= sprt
->u
.next
) {
343 if (!rt6_check_expired(sprt
) &&
344 (sprt
->rt6i_flags
& RTF_DEFAULT
) &&
347 sprt
->rt6i_dev
->ifindex
== oif
))) {
353 /* no default route. give up. */
354 match
= &ip6_null_entry
;
361 struct rt6_info
*rt6_lookup(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
364 struct fib6_node
*fn
;
367 read_lock_bh(&rt6_lock
);
368 fn
= fib6_lookup(&ip6_routing_table
, daddr
, saddr
);
369 rt
= rt6_device_match(fn
->leaf
, oif
, strict
);
370 dst_hold(&rt
->u
.dst
);
372 read_unlock_bh(&rt6_lock
);
374 rt
->u
.dst
.lastuse
= jiffies
;
375 if (rt
->u
.dst
.error
== 0)
377 dst_release(&rt
->u
.dst
);
381 /* ip6_ins_rt is called with FREE rt6_lock.
382 It takes new route entry, the addition fails by any reason the
383 route is freed. In any case, if caller does not hold it, it may
387 int ip6_ins_rt(struct rt6_info
*rt
, struct nlmsghdr
*nlh
,
388 void *_rtattr
, struct netlink_skb_parms
*req
)
392 write_lock_bh(&rt6_lock
);
393 err
= fib6_add(&ip6_routing_table
, rt
, nlh
, _rtattr
, req
);
394 write_unlock_bh(&rt6_lock
);
399 /* No rt6_lock! If COW failed, the function returns dead route entry
400 with dst->error set to errno value.
403 static struct rt6_info
*rt6_cow(struct rt6_info
*ort
, struct in6_addr
*daddr
,
404 struct in6_addr
*saddr
, struct netlink_skb_parms
*req
)
413 rt
= ip6_rt_copy(ort
);
416 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
418 if (!(rt
->rt6i_flags
&RTF_GATEWAY
))
419 ipv6_addr_copy(&rt
->rt6i_gateway
, daddr
);
421 rt
->rt6i_dst
.plen
= 128;
422 rt
->rt6i_flags
|= RTF_CACHE
;
423 rt
->u
.dst
.flags
|= DST_HOST
;
425 #ifdef CONFIG_IPV6_SUBTREES
426 if (rt
->rt6i_src
.plen
&& saddr
) {
427 ipv6_addr_copy(&rt
->rt6i_src
.addr
, saddr
);
428 rt
->rt6i_src
.plen
= 128;
432 rt
->rt6i_nexthop
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
434 dst_hold(&rt
->u
.dst
);
436 err
= ip6_ins_rt(rt
, NULL
, NULL
, req
);
440 rt
->u
.dst
.error
= err
;
444 dst_hold(&ip6_null_entry
.u
.dst
);
445 return &ip6_null_entry
;
448 #define BACKTRACK() \
449 if (rt == &ip6_null_entry && strict) { \
450 while ((fn = fn->parent) != NULL) { \
451 if (fn->fn_flags & RTN_ROOT) { \
452 dst_hold(&rt->u.dst); \
455 if (fn->fn_flags & RTN_RTINFO) \
461 void ip6_route_input(struct sk_buff
*skb
)
463 struct fib6_node
*fn
;
468 strict
= ipv6_addr_type(&skb
->nh
.ipv6h
->daddr
) & (IPV6_ADDR_MULTICAST
|IPV6_ADDR_LINKLOCAL
);
471 read_lock_bh(&rt6_lock
);
473 fn
= fib6_lookup(&ip6_routing_table
, &skb
->nh
.ipv6h
->daddr
,
474 &skb
->nh
.ipv6h
->saddr
);
479 if ((rt
->rt6i_flags
& RTF_CACHE
)) {
480 rt
= rt6_device_match(rt
, skb
->dev
->ifindex
, strict
);
482 dst_hold(&rt
->u
.dst
);
486 rt
= rt6_device_match(rt
, skb
->dev
->ifindex
, 0);
489 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
)) {
490 struct rt6_info
*nrt
;
491 dst_hold(&rt
->u
.dst
);
492 read_unlock_bh(&rt6_lock
);
494 nrt
= rt6_cow(rt
, &skb
->nh
.ipv6h
->daddr
,
495 &skb
->nh
.ipv6h
->saddr
,
498 dst_release(&rt
->u
.dst
);
501 if (rt
->u
.dst
.error
!= -EEXIST
|| --attempts
<= 0)
504 /* Race condition! In the gap, when rt6_lock was
505 released someone could insert this route. Relookup.
507 dst_release(&rt
->u
.dst
);
510 dst_hold(&rt
->u
.dst
);
513 read_unlock_bh(&rt6_lock
);
515 rt
->u
.dst
.lastuse
= jiffies
;
517 skb
->dst
= (struct dst_entry
*) rt
;
520 struct dst_entry
* ip6_route_output(struct sock
*sk
, struct flowi
*fl
)
522 struct fib6_node
*fn
;
527 strict
= ipv6_addr_type(&fl
->fl6_dst
) & (IPV6_ADDR_MULTICAST
|IPV6_ADDR_LINKLOCAL
);
530 read_lock_bh(&rt6_lock
);
532 fn
= fib6_lookup(&ip6_routing_table
, &fl
->fl6_dst
, &fl
->fl6_src
);
537 if ((rt
->rt6i_flags
& RTF_CACHE
)) {
538 rt
= rt6_device_match(rt
, fl
->oif
, strict
);
540 dst_hold(&rt
->u
.dst
);
543 if (rt
->rt6i_flags
& RTF_DEFAULT
) {
544 if (rt
->rt6i_metric
>= IP6_RT_PRIO_ADDRCONF
)
545 rt
= rt6_best_dflt(rt
, fl
->oif
);
547 rt
= rt6_device_match(rt
, fl
->oif
, strict
);
551 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
)) {
552 struct rt6_info
*nrt
;
553 dst_hold(&rt
->u
.dst
);
554 read_unlock_bh(&rt6_lock
);
556 nrt
= rt6_cow(rt
, &fl
->fl6_dst
, &fl
->fl6_src
, NULL
);
558 dst_release(&rt
->u
.dst
);
561 if (rt
->u
.dst
.error
!= -EEXIST
|| --attempts
<= 0)
564 /* Race condition! In the gap, when rt6_lock was
565 released someone could insert this route. Relookup.
567 dst_release(&rt
->u
.dst
);
570 dst_hold(&rt
->u
.dst
);
573 read_unlock_bh(&rt6_lock
);
575 rt
->u
.dst
.lastuse
= jiffies
;
582 * Destination cache support functions
585 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
589 rt
= (struct rt6_info
*) dst
;
591 if (rt
&& rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
))
597 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
599 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
602 if (rt
->rt6i_flags
& RTF_CACHE
)
603 ip6_del_rt(rt
, NULL
, NULL
, NULL
);
610 static void ip6_link_failure(struct sk_buff
*skb
)
614 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0, skb
->dev
);
616 rt
= (struct rt6_info
*) skb
->dst
;
618 if (rt
->rt6i_flags
&RTF_CACHE
) {
619 dst_set_expires(&rt
->u
.dst
, 0);
620 rt
->rt6i_flags
|= RTF_EXPIRES
;
621 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
622 rt
->rt6i_node
->fn_sernum
= -1;
626 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
628 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
630 if (mtu
< dst_mtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
631 rt6
->rt6i_flags
|= RTF_MODIFIED
;
632 if (mtu
< IPV6_MIN_MTU
) {
634 dst
->metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
636 dst
->metrics
[RTAX_MTU
-1] = mtu
;
640 /* Protected by rt6_lock. */
641 static struct dst_entry
*ndisc_dst_gc_list
;
642 static int ipv6_get_mtu(struct net_device
*dev
);
644 static inline unsigned int ipv6_advmss(unsigned int mtu
)
646 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
648 if (mtu
< ip6_rt_min_advmss
)
649 mtu
= ip6_rt_min_advmss
;
652 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
653 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
654 * IPV6_MAXPLEN is also valid and means: "any MSS,
655 * rely only on pmtu discovery"
657 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
662 struct dst_entry
*ndisc_dst_alloc(struct net_device
*dev
,
663 struct neighbour
*neigh
,
664 struct in6_addr
*addr
,
665 int (*output
)(struct sk_buff
*))
668 struct inet6_dev
*idev
= in6_dev_get(dev
);
670 if (unlikely(idev
== NULL
))
673 rt
= ip6_dst_alloc();
674 if (unlikely(rt
== NULL
)) {
683 neigh
= ndisc_get_neigh(dev
, addr
);
686 rt
->rt6i_idev
= idev
;
687 rt
->rt6i_nexthop
= neigh
;
688 atomic_set(&rt
->u
.dst
.__refcnt
, 1);
689 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = 255;
690 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(rt
->rt6i_dev
);
691 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
692 rt
->u
.dst
.output
= output
;
694 #if 0 /* there's no chance to use these for ndisc */
695 rt
->u
.dst
.flags
= ipv6_addr_type(addr
) & IPV6_ADDR_UNICAST
698 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
699 rt
->rt6i_dst
.plen
= 128;
702 write_lock_bh(&rt6_lock
);
703 rt
->u
.dst
.next
= ndisc_dst_gc_list
;
704 ndisc_dst_gc_list
= &rt
->u
.dst
;
705 write_unlock_bh(&rt6_lock
);
707 fib6_force_start_gc();
710 return (struct dst_entry
*)rt
;
713 int ndisc_dst_gc(int *more
)
715 struct dst_entry
*dst
, *next
, **pprev
;
719 pprev
= &ndisc_dst_gc_list
;
721 while ((dst
= *pprev
) != NULL
) {
722 if (!atomic_read(&dst
->__refcnt
)) {
735 static int ip6_dst_gc(void)
737 static unsigned expire
= 30*HZ
;
738 static unsigned long last_gc
;
739 unsigned long now
= jiffies
;
741 if (time_after(last_gc
+ ip6_rt_gc_min_interval
, now
) &&
742 atomic_read(&ip6_dst_ops
.entries
) <= ip6_rt_max_size
)
748 if (atomic_read(&ip6_dst_ops
.entries
) < ip6_dst_ops
.gc_thresh
)
749 expire
= ip6_rt_gc_timeout
>>1;
752 expire
-= expire
>>ip6_rt_gc_elasticity
;
753 return (atomic_read(&ip6_dst_ops
.entries
) > ip6_rt_max_size
);
756 /* Clean host part of a prefix. Not necessary in radix tree,
757 but results in cleaner routing tables.
759 Remove it only when all the things will work!
762 static int ipv6_get_mtu(struct net_device
*dev
)
764 int mtu
= IPV6_MIN_MTU
;
765 struct inet6_dev
*idev
;
767 idev
= in6_dev_get(dev
);
769 mtu
= idev
->cnf
.mtu6
;
775 int ipv6_get_hoplimit(struct net_device
*dev
)
777 int hoplimit
= ipv6_devconf
.hop_limit
;
778 struct inet6_dev
*idev
;
780 idev
= in6_dev_get(dev
);
782 hoplimit
= idev
->cnf
.hop_limit
;
792 int ip6_route_add(struct in6_rtmsg
*rtmsg
, struct nlmsghdr
*nlh
,
793 void *_rtattr
, struct netlink_skb_parms
*req
)
798 struct rt6_info
*rt
= NULL
;
799 struct net_device
*dev
= NULL
;
800 struct inet6_dev
*idev
= NULL
;
803 rta
= (struct rtattr
**) _rtattr
;
805 if (rtmsg
->rtmsg_dst_len
> 128 || rtmsg
->rtmsg_src_len
> 128)
807 #ifndef CONFIG_IPV6_SUBTREES
808 if (rtmsg
->rtmsg_src_len
)
811 if (rtmsg
->rtmsg_ifindex
) {
813 dev
= dev_get_by_index(rtmsg
->rtmsg_ifindex
);
816 idev
= in6_dev_get(dev
);
821 if (rtmsg
->rtmsg_metric
== 0)
822 rtmsg
->rtmsg_metric
= IP6_RT_PRIO_USER
;
824 rt
= ip6_dst_alloc();
831 rt
->u
.dst
.obsolete
= -1;
832 rt
->rt6i_expires
= clock_t_to_jiffies(rtmsg
->rtmsg_info
);
833 if (nlh
&& (r
= NLMSG_DATA(nlh
))) {
834 rt
->rt6i_protocol
= r
->rtm_protocol
;
836 rt
->rt6i_protocol
= RTPROT_BOOT
;
839 addr_type
= ipv6_addr_type(&rtmsg
->rtmsg_dst
);
841 if (addr_type
& IPV6_ADDR_MULTICAST
)
842 rt
->u
.dst
.input
= ip6_mc_input
;
844 rt
->u
.dst
.input
= ip6_forward
;
846 rt
->u
.dst
.output
= ip6_output
;
848 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
,
849 &rtmsg
->rtmsg_dst
, rtmsg
->rtmsg_dst_len
);
850 rt
->rt6i_dst
.plen
= rtmsg
->rtmsg_dst_len
;
851 if (rt
->rt6i_dst
.plen
== 128)
852 rt
->u
.dst
.flags
= DST_HOST
;
854 #ifdef CONFIG_IPV6_SUBTREES
855 ipv6_addr_prefix(&rt
->rt6i_src
.addr
,
856 &rtmsg
->rtmsg_src
, rtmsg
->rtmsg_src_len
);
857 rt
->rt6i_src
.plen
= rtmsg
->rtmsg_src_len
;
860 rt
->rt6i_metric
= rtmsg
->rtmsg_metric
;
862 /* We cannot add true routes via loopback here,
863 they would result in kernel looping; promote them to reject routes
865 if ((rtmsg
->rtmsg_flags
&RTF_REJECT
) ||
866 (dev
&& (dev
->flags
&IFF_LOOPBACK
) && !(addr_type
&IPV6_ADDR_LOOPBACK
))) {
867 /* hold loopback dev/idev if we haven't done so. */
868 if (dev
!= &loopback_dev
) {
875 idev
= in6_dev_get(dev
);
881 rt
->u
.dst
.output
= ip6_pkt_discard_out
;
882 rt
->u
.dst
.input
= ip6_pkt_discard
;
883 rt
->u
.dst
.error
= -ENETUNREACH
;
884 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
888 if (rtmsg
->rtmsg_flags
& RTF_GATEWAY
) {
889 struct in6_addr
*gw_addr
;
892 gw_addr
= &rtmsg
->rtmsg_gateway
;
893 ipv6_addr_copy(&rt
->rt6i_gateway
, &rtmsg
->rtmsg_gateway
);
894 gwa_type
= ipv6_addr_type(gw_addr
);
896 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
897 struct rt6_info
*grt
;
899 /* IPv6 strictly inhibits using not link-local
900 addresses as nexthop address.
901 Otherwise, router will not able to send redirects.
902 It is very good, but in some (rare!) circumstances
903 (SIT, PtP, NBMA NOARP links) it is handy to allow
904 some exceptions. --ANK
907 if (!(gwa_type
&IPV6_ADDR_UNICAST
))
910 grt
= rt6_lookup(gw_addr
, NULL
, rtmsg
->rtmsg_ifindex
, 1);
916 if (dev
!= grt
->rt6i_dev
) {
917 dst_release(&grt
->u
.dst
);
922 idev
= grt
->rt6i_idev
;
924 in6_dev_hold(grt
->rt6i_idev
);
926 if (!(grt
->rt6i_flags
&RTF_GATEWAY
))
928 dst_release(&grt
->u
.dst
);
934 if (dev
== NULL
|| (dev
->flags
&IFF_LOOPBACK
))
942 if (rtmsg
->rtmsg_flags
& (RTF_GATEWAY
|RTF_NONEXTHOP
)) {
943 rt
->rt6i_nexthop
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
944 if (IS_ERR(rt
->rt6i_nexthop
)) {
945 err
= PTR_ERR(rt
->rt6i_nexthop
);
946 rt
->rt6i_nexthop
= NULL
;
951 rt
->rt6i_flags
= rtmsg
->rtmsg_flags
;
954 if (rta
&& rta
[RTA_METRICS
-1]) {
955 int attrlen
= RTA_PAYLOAD(rta
[RTA_METRICS
-1]);
956 struct rtattr
*attr
= RTA_DATA(rta
[RTA_METRICS
-1]);
958 while (RTA_OK(attr
, attrlen
)) {
959 unsigned flavor
= attr
->rta_type
;
961 if (flavor
> RTAX_MAX
) {
965 rt
->u
.dst
.metrics
[flavor
-1] =
966 *(u32
*)RTA_DATA(attr
);
968 attr
= RTA_NEXT(attr
, attrlen
);
972 if (rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] == 0)
973 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = -1;
974 if (!rt
->u
.dst
.metrics
[RTAX_MTU
-1])
975 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(dev
);
976 if (!rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1])
977 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
979 rt
->rt6i_idev
= idev
;
980 return ip6_ins_rt(rt
, nlh
, _rtattr
, req
);
988 dst_free((struct dst_entry
*) rt
);
992 int ip6_del_rt(struct rt6_info
*rt
, struct nlmsghdr
*nlh
, void *_rtattr
, struct netlink_skb_parms
*req
)
996 write_lock_bh(&rt6_lock
);
998 rt6_reset_dflt_pointer(NULL
);
1000 err
= fib6_del(rt
, nlh
, _rtattr
, req
);
1001 dst_release(&rt
->u
.dst
);
1003 write_unlock_bh(&rt6_lock
);
1008 static int ip6_route_del(struct in6_rtmsg
*rtmsg
, struct nlmsghdr
*nlh
, void *_rtattr
, struct netlink_skb_parms
*req
)
1010 struct fib6_node
*fn
;
1011 struct rt6_info
*rt
;
1014 read_lock_bh(&rt6_lock
);
1016 fn
= fib6_locate(&ip6_routing_table
,
1017 &rtmsg
->rtmsg_dst
, rtmsg
->rtmsg_dst_len
,
1018 &rtmsg
->rtmsg_src
, rtmsg
->rtmsg_src_len
);
1021 for (rt
= fn
->leaf
; rt
; rt
= rt
->u
.next
) {
1022 if (rtmsg
->rtmsg_ifindex
&&
1023 (rt
->rt6i_dev
== NULL
||
1024 rt
->rt6i_dev
->ifindex
!= rtmsg
->rtmsg_ifindex
))
1026 if (rtmsg
->rtmsg_flags
&RTF_GATEWAY
&&
1027 !ipv6_addr_equal(&rtmsg
->rtmsg_gateway
, &rt
->rt6i_gateway
))
1029 if (rtmsg
->rtmsg_metric
&&
1030 rtmsg
->rtmsg_metric
!= rt
->rt6i_metric
)
1032 dst_hold(&rt
->u
.dst
);
1033 read_unlock_bh(&rt6_lock
);
1035 return ip6_del_rt(rt
, nlh
, _rtattr
, req
);
1038 read_unlock_bh(&rt6_lock
);
1046 void rt6_redirect(struct in6_addr
*dest
, struct in6_addr
*saddr
,
1047 struct neighbour
*neigh
, u8
*lladdr
, int on_link
)
1049 struct rt6_info
*rt
, *nrt
;
1051 /* Locate old route to this destination. */
1052 rt
= rt6_lookup(dest
, NULL
, neigh
->dev
->ifindex
, 1);
1057 if (neigh
->dev
!= rt
->rt6i_dev
)
1061 * Current route is on-link; redirect is always invalid.
1063 * Seems, previous statement is not true. It could
1064 * be node, which looks for us as on-link (f.e. proxy ndisc)
1065 * But then router serving it might decide, that we should
1066 * know truth 8)8) --ANK (980726).
1068 if (!(rt
->rt6i_flags
&RTF_GATEWAY
))
1072 * RFC 2461 specifies that redirects should only be
1073 * accepted if they come from the nexthop to the target.
1074 * Due to the way default routers are chosen, this notion
1075 * is a bit fuzzy and one might need to check all default
1078 if (!ipv6_addr_equal(saddr
, &rt
->rt6i_gateway
)) {
1079 if (rt
->rt6i_flags
& RTF_DEFAULT
) {
1080 struct rt6_info
*rt1
;
1082 read_lock(&rt6_lock
);
1083 for (rt1
= ip6_routing_table
.leaf
; rt1
; rt1
= rt1
->u
.next
) {
1084 if (ipv6_addr_equal(saddr
, &rt1
->rt6i_gateway
)) {
1085 dst_hold(&rt1
->u
.dst
);
1086 dst_release(&rt
->u
.dst
);
1087 read_unlock(&rt6_lock
);
1092 read_unlock(&rt6_lock
);
1094 if (net_ratelimit())
1095 printk(KERN_DEBUG
"rt6_redirect: source isn't a valid nexthop "
1096 "for redirect target\n");
1103 * We have finally decided to accept it.
1106 neigh_update(neigh
, lladdr
, NUD_STALE
,
1107 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
1108 NEIGH_UPDATE_F_OVERRIDE
|
1109 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
1110 NEIGH_UPDATE_F_ISROUTER
))
1114 * Redirect received -> path was valid.
1115 * Look, redirects are sent only in response to data packets,
1116 * so that this nexthop apparently is reachable. --ANK
1118 dst_confirm(&rt
->u
.dst
);
1120 /* Duplicate redirect: silently ignore. */
1121 if (neigh
== rt
->u
.dst
.neighbour
)
1124 nrt
= ip6_rt_copy(rt
);
1128 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1130 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1132 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, dest
);
1133 nrt
->rt6i_dst
.plen
= 128;
1134 nrt
->u
.dst
.flags
|= DST_HOST
;
1136 ipv6_addr_copy(&nrt
->rt6i_gateway
, (struct in6_addr
*)neigh
->primary_key
);
1137 nrt
->rt6i_nexthop
= neigh_clone(neigh
);
1138 /* Reset pmtu, it may be better */
1139 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(neigh
->dev
);
1140 nrt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&nrt
->u
.dst
));
1142 if (ip6_ins_rt(nrt
, NULL
, NULL
, NULL
))
1145 if (rt
->rt6i_flags
&RTF_CACHE
) {
1146 ip6_del_rt(rt
, NULL
, NULL
, NULL
);
1151 dst_release(&rt
->u
.dst
);
1156 * Handle ICMP "packet too big" messages
1157 * i.e. Path MTU discovery
1160 void rt6_pmtu_discovery(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
1161 struct net_device
*dev
, u32 pmtu
)
1163 struct rt6_info
*rt
, *nrt
;
1166 rt
= rt6_lookup(daddr
, saddr
, dev
->ifindex
, 0);
1170 if (pmtu
>= dst_mtu(&rt
->u
.dst
))
1173 if (pmtu
< IPV6_MIN_MTU
) {
1175 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1176 * MTU (1280) and a fragment header should always be included
1177 * after a node receiving Too Big message reporting PMTU is
1178 * less than the IPv6 Minimum Link MTU.
1180 pmtu
= IPV6_MIN_MTU
;
1184 /* New mtu received -> path was valid.
1185 They are sent only in response to data packets,
1186 so that this nexthop apparently is reachable. --ANK
1188 dst_confirm(&rt
->u
.dst
);
1190 /* Host route. If it is static, it would be better
1191 not to override it, but add new one, so that
1192 when cache entry will expire old pmtu
1193 would return automatically.
1195 if (rt
->rt6i_flags
& RTF_CACHE
) {
1196 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1198 rt
->u
.dst
.metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
1199 dst_set_expires(&rt
->u
.dst
, ip6_rt_mtu_expires
);
1200 rt
->rt6i_flags
|= RTF_MODIFIED
|RTF_EXPIRES
;
1205 Two cases are possible:
1206 1. It is connected route. Action: COW
1207 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1209 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
)) {
1210 nrt
= rt6_cow(rt
, daddr
, saddr
, NULL
);
1211 if (!nrt
->u
.dst
.error
) {
1212 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1214 nrt
->u
.dst
.metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
1215 /* According to RFC 1981, detecting PMTU increase shouldn't be
1216 happened within 5 mins, the recommended timer is 10 mins.
1217 Here this route expiration time is set to ip6_rt_mtu_expires
1218 which is 10 mins. After 10 mins the decreased pmtu is expired
1219 and detecting PMTU increase will be automatically happened.
1221 dst_set_expires(&nrt
->u
.dst
, ip6_rt_mtu_expires
);
1222 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_EXPIRES
;
1224 dst_release(&nrt
->u
.dst
);
1226 nrt
= ip6_rt_copy(rt
);
1229 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, daddr
);
1230 nrt
->rt6i_dst
.plen
= 128;
1231 nrt
->u
.dst
.flags
|= DST_HOST
;
1232 nrt
->rt6i_nexthop
= neigh_clone(rt
->rt6i_nexthop
);
1233 dst_set_expires(&nrt
->u
.dst
, ip6_rt_mtu_expires
);
1234 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_CACHE
|RTF_EXPIRES
;
1235 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1237 nrt
->u
.dst
.metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
1238 ip6_ins_rt(nrt
, NULL
, NULL
, NULL
);
1242 dst_release(&rt
->u
.dst
);
1246 * Misc support functions
1249 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
)
1251 struct rt6_info
*rt
= ip6_dst_alloc();
1254 rt
->u
.dst
.input
= ort
->u
.dst
.input
;
1255 rt
->u
.dst
.output
= ort
->u
.dst
.output
;
1257 memcpy(rt
->u
.dst
.metrics
, ort
->u
.dst
.metrics
, RTAX_MAX
*sizeof(u32
));
1258 rt
->u
.dst
.dev
= ort
->u
.dst
.dev
;
1260 dev_hold(rt
->u
.dst
.dev
);
1261 rt
->rt6i_idev
= ort
->rt6i_idev
;
1263 in6_dev_hold(rt
->rt6i_idev
);
1264 rt
->u
.dst
.lastuse
= jiffies
;
1265 rt
->rt6i_expires
= 0;
1267 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
1268 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
1269 rt
->rt6i_metric
= 0;
1271 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1272 #ifdef CONFIG_IPV6_SUBTREES
1273 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1279 struct rt6_info
*rt6_get_dflt_router(struct in6_addr
*addr
, struct net_device
*dev
)
1281 struct rt6_info
*rt
;
1282 struct fib6_node
*fn
;
1284 fn
= &ip6_routing_table
;
1286 write_lock_bh(&rt6_lock
);
1287 for (rt
= fn
->leaf
; rt
; rt
=rt
->u
.next
) {
1288 if (dev
== rt
->rt6i_dev
&&
1289 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
1293 dst_hold(&rt
->u
.dst
);
1294 write_unlock_bh(&rt6_lock
);
1298 struct rt6_info
*rt6_add_dflt_router(struct in6_addr
*gwaddr
,
1299 struct net_device
*dev
)
1301 struct in6_rtmsg rtmsg
;
1303 memset(&rtmsg
, 0, sizeof(struct in6_rtmsg
));
1304 rtmsg
.rtmsg_type
= RTMSG_NEWROUTE
;
1305 ipv6_addr_copy(&rtmsg
.rtmsg_gateway
, gwaddr
);
1306 rtmsg
.rtmsg_metric
= 1024;
1307 rtmsg
.rtmsg_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
| RTF_UP
| RTF_EXPIRES
;
1309 rtmsg
.rtmsg_ifindex
= dev
->ifindex
;
1311 ip6_route_add(&rtmsg
, NULL
, NULL
, NULL
);
1312 return rt6_get_dflt_router(gwaddr
, dev
);
1315 void rt6_purge_dflt_routers(void)
1317 struct rt6_info
*rt
;
1320 read_lock_bh(&rt6_lock
);
1321 for (rt
= ip6_routing_table
.leaf
; rt
; rt
= rt
->u
.next
) {
1322 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) {
1323 dst_hold(&rt
->u
.dst
);
1325 rt6_reset_dflt_pointer(NULL
);
1327 read_unlock_bh(&rt6_lock
);
1329 ip6_del_rt(rt
, NULL
, NULL
, NULL
);
1334 read_unlock_bh(&rt6_lock
);
1337 int ipv6_route_ioctl(unsigned int cmd
, void __user
*arg
)
1339 struct in6_rtmsg rtmsg
;
1343 case SIOCADDRT
: /* Add a route */
1344 case SIOCDELRT
: /* Delete a route */
1345 if (!capable(CAP_NET_ADMIN
))
1347 err
= copy_from_user(&rtmsg
, arg
,
1348 sizeof(struct in6_rtmsg
));
1355 err
= ip6_route_add(&rtmsg
, NULL
, NULL
, NULL
);
1358 err
= ip6_route_del(&rtmsg
, NULL
, NULL
, NULL
);
1372 * Drop the packet on the floor
1375 static int ip6_pkt_discard(struct sk_buff
*skb
)
1377 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES
);
1378 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_NOROUTE
, 0, skb
->dev
);
1383 static int ip6_pkt_discard_out(struct sk_buff
*skb
)
1385 skb
->dev
= skb
->dst
->dev
;
1386 return ip6_pkt_discard(skb
);
1390 * Allocate a dst for local (unicast / anycast) address.
1393 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
1394 const struct in6_addr
*addr
,
1397 struct rt6_info
*rt
= ip6_dst_alloc();
1400 return ERR_PTR(-ENOMEM
);
1402 dev_hold(&loopback_dev
);
1405 rt
->u
.dst
.flags
= DST_HOST
;
1406 rt
->u
.dst
.input
= ip6_input
;
1407 rt
->u
.dst
.output
= ip6_output
;
1408 rt
->rt6i_dev
= &loopback_dev
;
1409 rt
->rt6i_idev
= idev
;
1410 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(rt
->rt6i_dev
);
1411 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
1412 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = -1;
1413 rt
->u
.dst
.obsolete
= -1;
1415 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
1417 rt
->rt6i_flags
|= RTF_LOCAL
;
1418 rt
->rt6i_nexthop
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
1419 if (rt
->rt6i_nexthop
== NULL
) {
1420 dst_free((struct dst_entry
*) rt
);
1421 return ERR_PTR(-ENOMEM
);
1424 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1425 rt
->rt6i_dst
.plen
= 128;
1427 atomic_set(&rt
->u
.dst
.__refcnt
, 1);
1432 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
1434 if (((void*)rt
->rt6i_dev
== arg
|| arg
== NULL
) &&
1435 rt
!= &ip6_null_entry
) {
1436 RT6_TRACE("deleted by ifdown %p\n", rt
);
1442 void rt6_ifdown(struct net_device
*dev
)
1444 write_lock_bh(&rt6_lock
);
1445 fib6_clean_tree(&ip6_routing_table
, fib6_ifdown
, 0, dev
);
1446 write_unlock_bh(&rt6_lock
);
1449 struct rt6_mtu_change_arg
1451 struct net_device
*dev
;
1455 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
1457 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
1458 struct inet6_dev
*idev
;
1460 /* In IPv6 pmtu discovery is not optional,
1461 so that RTAX_MTU lock cannot disable it.
1462 We still use this lock to block changes
1463 caused by addrconf/ndisc.
1466 idev
= __in6_dev_get(arg
->dev
);
1470 /* For administrative MTU increase, there is no way to discover
1471 IPv6 PMTU increase, so PMTU increase should be updated here.
1472 Since RFC 1981 doesn't include administrative MTU increase
1473 update PMTU increase is a MUST. (i.e. jumbo frame)
1476 If new MTU is less than route PMTU, this new MTU will be the
1477 lowest MTU in the path, update the route PMTU to reflect PMTU
1478 decreases; if new MTU is greater than route PMTU, and the
1479 old MTU is the lowest MTU in the path, update the route PMTU
1480 to reflect the increase. In this case if the other nodes' MTU
1481 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1484 if (rt
->rt6i_dev
== arg
->dev
&&
1485 !dst_metric_locked(&rt
->u
.dst
, RTAX_MTU
) &&
1486 (dst_mtu(&rt
->u
.dst
) > arg
->mtu
||
1487 (dst_mtu(&rt
->u
.dst
) < arg
->mtu
&&
1488 dst_mtu(&rt
->u
.dst
) == idev
->cnf
.mtu6
)))
1489 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = arg
->mtu
;
1490 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(arg
->mtu
);
1494 void rt6_mtu_change(struct net_device
*dev
, unsigned mtu
)
1496 struct rt6_mtu_change_arg arg
;
1500 read_lock_bh(&rt6_lock
);
1501 fib6_clean_tree(&ip6_routing_table
, rt6_mtu_change_route
, 0, &arg
);
1502 read_unlock_bh(&rt6_lock
);
1505 static int inet6_rtm_to_rtmsg(struct rtmsg
*r
, struct rtattr
**rta
,
1506 struct in6_rtmsg
*rtmsg
)
1508 memset(rtmsg
, 0, sizeof(*rtmsg
));
1510 rtmsg
->rtmsg_dst_len
= r
->rtm_dst_len
;
1511 rtmsg
->rtmsg_src_len
= r
->rtm_src_len
;
1512 rtmsg
->rtmsg_flags
= RTF_UP
;
1513 if (r
->rtm_type
== RTN_UNREACHABLE
)
1514 rtmsg
->rtmsg_flags
|= RTF_REJECT
;
1516 if (rta
[RTA_GATEWAY
-1]) {
1517 if (rta
[RTA_GATEWAY
-1]->rta_len
!= RTA_LENGTH(16))
1519 memcpy(&rtmsg
->rtmsg_gateway
, RTA_DATA(rta
[RTA_GATEWAY
-1]), 16);
1520 rtmsg
->rtmsg_flags
|= RTF_GATEWAY
;
1522 if (rta
[RTA_DST
-1]) {
1523 if (RTA_PAYLOAD(rta
[RTA_DST
-1]) < ((r
->rtm_dst_len
+7)>>3))
1525 memcpy(&rtmsg
->rtmsg_dst
, RTA_DATA(rta
[RTA_DST
-1]), ((r
->rtm_dst_len
+7)>>3));
1527 if (rta
[RTA_SRC
-1]) {
1528 if (RTA_PAYLOAD(rta
[RTA_SRC
-1]) < ((r
->rtm_src_len
+7)>>3))
1530 memcpy(&rtmsg
->rtmsg_src
, RTA_DATA(rta
[RTA_SRC
-1]), ((r
->rtm_src_len
+7)>>3));
1532 if (rta
[RTA_OIF
-1]) {
1533 if (rta
[RTA_OIF
-1]->rta_len
!= RTA_LENGTH(sizeof(int)))
1535 memcpy(&rtmsg
->rtmsg_ifindex
, RTA_DATA(rta
[RTA_OIF
-1]), sizeof(int));
1537 if (rta
[RTA_PRIORITY
-1]) {
1538 if (rta
[RTA_PRIORITY
-1]->rta_len
!= RTA_LENGTH(4))
1540 memcpy(&rtmsg
->rtmsg_metric
, RTA_DATA(rta
[RTA_PRIORITY
-1]), 4);
1545 int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
1547 struct rtmsg
*r
= NLMSG_DATA(nlh
);
1548 struct in6_rtmsg rtmsg
;
1550 if (inet6_rtm_to_rtmsg(r
, arg
, &rtmsg
))
1552 return ip6_route_del(&rtmsg
, nlh
, arg
, &NETLINK_CB(skb
));
1555 int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
1557 struct rtmsg
*r
= NLMSG_DATA(nlh
);
1558 struct in6_rtmsg rtmsg
;
1560 if (inet6_rtm_to_rtmsg(r
, arg
, &rtmsg
))
1562 return ip6_route_add(&rtmsg
, nlh
, arg
, &NETLINK_CB(skb
));
1565 struct rt6_rtnl_dump_arg
1567 struct sk_buff
*skb
;
1568 struct netlink_callback
*cb
;
1571 static int rt6_fill_node(struct sk_buff
*skb
, struct rt6_info
*rt
,
1572 struct in6_addr
*dst
, struct in6_addr
*src
,
1573 int iif
, int type
, u32 pid
, u32 seq
,
1574 int prefix
, unsigned int flags
)
1577 struct nlmsghdr
*nlh
;
1578 unsigned char *b
= skb
->tail
;
1579 struct rta_cacheinfo ci
;
1581 if (prefix
) { /* user wants prefix routes only */
1582 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
1583 /* success since this is not a prefix route */
1588 nlh
= NLMSG_NEW(skb
, pid
, seq
, type
, sizeof(*rtm
), flags
);
1589 rtm
= NLMSG_DATA(nlh
);
1590 rtm
->rtm_family
= AF_INET6
;
1591 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
1592 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
1594 rtm
->rtm_table
= RT_TABLE_MAIN
;
1595 if (rt
->rt6i_flags
&RTF_REJECT
)
1596 rtm
->rtm_type
= RTN_UNREACHABLE
;
1597 else if (rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
&IFF_LOOPBACK
))
1598 rtm
->rtm_type
= RTN_LOCAL
;
1600 rtm
->rtm_type
= RTN_UNICAST
;
1602 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
1603 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
1604 if (rt
->rt6i_flags
&RTF_DYNAMIC
)
1605 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
1606 else if (rt
->rt6i_flags
& RTF_ADDRCONF
)
1607 rtm
->rtm_protocol
= RTPROT_KERNEL
;
1608 else if (rt
->rt6i_flags
&RTF_DEFAULT
)
1609 rtm
->rtm_protocol
= RTPROT_RA
;
1611 if (rt
->rt6i_flags
&RTF_CACHE
)
1612 rtm
->rtm_flags
|= RTM_F_CLONED
;
1615 RTA_PUT(skb
, RTA_DST
, 16, dst
);
1616 rtm
->rtm_dst_len
= 128;
1617 } else if (rtm
->rtm_dst_len
)
1618 RTA_PUT(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
);
1619 #ifdef CONFIG_IPV6_SUBTREES
1621 RTA_PUT(skb
, RTA_SRC
, 16, src
);
1622 rtm
->rtm_src_len
= 128;
1623 } else if (rtm
->rtm_src_len
)
1624 RTA_PUT(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
);
1627 RTA_PUT(skb
, RTA_IIF
, 4, &iif
);
1629 struct in6_addr saddr_buf
;
1630 if (ipv6_get_saddr(&rt
->u
.dst
, dst
, &saddr_buf
) == 0)
1631 RTA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
1633 if (rtnetlink_put_metrics(skb
, rt
->u
.dst
.metrics
) < 0)
1634 goto rtattr_failure
;
1635 if (rt
->u
.dst
.neighbour
)
1636 RTA_PUT(skb
, RTA_GATEWAY
, 16, &rt
->u
.dst
.neighbour
->primary_key
);
1638 RTA_PUT(skb
, RTA_OIF
, sizeof(int), &rt
->rt6i_dev
->ifindex
);
1639 RTA_PUT(skb
, RTA_PRIORITY
, 4, &rt
->rt6i_metric
);
1640 ci
.rta_lastuse
= jiffies_to_clock_t(jiffies
- rt
->u
.dst
.lastuse
);
1641 if (rt
->rt6i_expires
)
1642 ci
.rta_expires
= jiffies_to_clock_t(rt
->rt6i_expires
- jiffies
);
1645 ci
.rta_used
= rt
->u
.dst
.__use
;
1646 ci
.rta_clntref
= atomic_read(&rt
->u
.dst
.__refcnt
);
1647 ci
.rta_error
= rt
->u
.dst
.error
;
1651 RTA_PUT(skb
, RTA_CACHEINFO
, sizeof(ci
), &ci
);
1652 nlh
->nlmsg_len
= skb
->tail
- b
;
1657 skb_trim(skb
, b
- skb
->data
);
1661 static int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
1663 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
1666 if (arg
->cb
->nlh
->nlmsg_len
>= NLMSG_LENGTH(sizeof(struct rtmsg
))) {
1667 struct rtmsg
*rtm
= NLMSG_DATA(arg
->cb
->nlh
);
1668 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
1672 return rt6_fill_node(arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
1673 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
,
1674 prefix
, NLM_F_MULTI
);
1677 static int fib6_dump_node(struct fib6_walker_t
*w
)
1680 struct rt6_info
*rt
;
1682 for (rt
= w
->leaf
; rt
; rt
= rt
->u
.next
) {
1683 res
= rt6_dump_route(rt
, w
->args
);
1685 /* Frame is full, suspend walking */
1695 static void fib6_dump_end(struct netlink_callback
*cb
)
1697 struct fib6_walker_t
*w
= (void*)cb
->args
[0];
1701 fib6_walker_unlink(w
);
1705 cb
->done
= (void*)cb
->args
[1];
1710 static int fib6_dump_done(struct netlink_callback
*cb
)
1713 return cb
->done(cb
);
1716 int inet6_dump_fib(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1718 struct rt6_rtnl_dump_arg arg
;
1719 struct fib6_walker_t
*w
;
1725 w
= (void*)cb
->args
[0];
1729 * 1. hook callback destructor.
1731 cb
->args
[1] = (long)cb
->done
;
1732 cb
->done
= fib6_dump_done
;
1735 * 2. allocate and initialize walker.
1737 w
= kmalloc(sizeof(*w
), GFP_ATOMIC
);
1740 RT6_TRACE("dump<%p", w
);
1741 memset(w
, 0, sizeof(*w
));
1742 w
->root
= &ip6_routing_table
;
1743 w
->func
= fib6_dump_node
;
1745 cb
->args
[0] = (long)w
;
1746 read_lock_bh(&rt6_lock
);
1748 read_unlock_bh(&rt6_lock
);
1751 read_lock_bh(&rt6_lock
);
1752 res
= fib6_walk_continue(w
);
1753 read_unlock_bh(&rt6_lock
);
1756 if (res
<= 0 && skb
->len
== 0)
1757 RT6_TRACE("%p>dump end\n", w
);
1759 res
= res
< 0 ? res
: skb
->len
;
1760 /* res < 0 is an error. (really, impossible)
1761 res == 0 means that dump is complete, but skb still can contain data.
1762 res > 0 dump is not complete, but frame is full.
1764 /* Destroy walker, if dump of this table is complete. */
1770 int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
1772 struct rtattr
**rta
= arg
;
1775 struct sk_buff
*skb
;
1777 struct rt6_info
*rt
;
1779 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1783 /* Reserve room for dummy headers, this skb can pass
1784 through good chunk of routing engine.
1786 skb
->mac
.raw
= skb
->data
;
1787 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
1789 memset(&fl
, 0, sizeof(fl
));
1791 ipv6_addr_copy(&fl
.fl6_src
,
1792 (struct in6_addr
*)RTA_DATA(rta
[RTA_SRC
-1]));
1794 ipv6_addr_copy(&fl
.fl6_dst
,
1795 (struct in6_addr
*)RTA_DATA(rta
[RTA_DST
-1]));
1798 memcpy(&iif
, RTA_DATA(rta
[RTA_IIF
-1]), sizeof(int));
1801 struct net_device
*dev
;
1802 dev
= __dev_get_by_index(iif
);
1811 memcpy(&fl
.oif
, RTA_DATA(rta
[RTA_OIF
-1]), sizeof(int));
1813 rt
= (struct rt6_info
*)ip6_route_output(NULL
, &fl
);
1815 skb
->dst
= &rt
->u
.dst
;
1817 NETLINK_CB(skb
).dst_pid
= NETLINK_CB(in_skb
).pid
;
1818 err
= rt6_fill_node(skb
, rt
,
1819 &fl
.fl6_dst
, &fl
.fl6_src
,
1821 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
,
1822 nlh
->nlmsg_seq
, 0, 0);
1828 err
= netlink_unicast(rtnl
, skb
, NETLINK_CB(in_skb
).pid
, MSG_DONTWAIT
);
1838 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nlmsghdr
*nlh
,
1839 struct netlink_skb_parms
*req
)
1841 struct sk_buff
*skb
;
1842 int size
= NLMSG_SPACE(sizeof(struct rtmsg
)+256);
1843 u32 pid
= current
->pid
;
1849 seq
= nlh
->nlmsg_seq
;
1851 skb
= alloc_skb(size
, gfp_any());
1853 netlink_set_err(rtnl
, 0, RTNLGRP_IPV6_ROUTE
, ENOBUFS
);
1856 if (rt6_fill_node(skb
, rt
, NULL
, NULL
, 0, event
, pid
, seq
, 0, 0) < 0) {
1858 netlink_set_err(rtnl
, 0, RTNLGRP_IPV6_ROUTE
, EINVAL
);
1861 NETLINK_CB(skb
).dst_group
= RTNLGRP_IPV6_ROUTE
;
1862 netlink_broadcast(rtnl
, skb
, 0, RTNLGRP_IPV6_ROUTE
, gfp_any());
1869 #ifdef CONFIG_PROC_FS
1871 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1882 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
1884 struct rt6_proc_arg
*arg
= (struct rt6_proc_arg
*) p_arg
;
1887 if (arg
->skip
< arg
->offset
/ RT6_INFO_LEN
) {
1892 if (arg
->len
>= arg
->length
)
1895 for (i
=0; i
<16; i
++) {
1896 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1897 rt
->rt6i_dst
.addr
.s6_addr
[i
]);
1900 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
, " %02x ",
1903 #ifdef CONFIG_IPV6_SUBTREES
1904 for (i
=0; i
<16; i
++) {
1905 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1906 rt
->rt6i_src
.addr
.s6_addr
[i
]);
1909 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
, " %02x ",
1912 sprintf(arg
->buffer
+ arg
->len
,
1913 "00000000000000000000000000000000 00 ");
1917 if (rt
->rt6i_nexthop
) {
1918 for (i
=0; i
<16; i
++) {
1919 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1920 rt
->rt6i_nexthop
->primary_key
[i
]);
1924 sprintf(arg
->buffer
+ arg
->len
,
1925 "00000000000000000000000000000000");
1928 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
1929 " %08x %08x %08x %08x %8s\n",
1930 rt
->rt6i_metric
, atomic_read(&rt
->u
.dst
.__refcnt
),
1931 rt
->u
.dst
.__use
, rt
->rt6i_flags
,
1932 rt
->rt6i_dev
? rt
->rt6i_dev
->name
: "");
1936 static int rt6_proc_info(char *buffer
, char **start
, off_t offset
, int length
)
1938 struct rt6_proc_arg arg
;
1939 arg
.buffer
= buffer
;
1940 arg
.offset
= offset
;
1941 arg
.length
= length
;
1945 read_lock_bh(&rt6_lock
);
1946 fib6_clean_tree(&ip6_routing_table
, rt6_info_route
, 0, &arg
);
1947 read_unlock_bh(&rt6_lock
);
1951 *start
+= offset
% RT6_INFO_LEN
;
1953 arg
.len
-= offset
% RT6_INFO_LEN
;
1955 if (arg
.len
> length
)
1963 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
1965 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
1966 rt6_stats
.fib_nodes
, rt6_stats
.fib_route_nodes
,
1967 rt6_stats
.fib_rt_alloc
, rt6_stats
.fib_rt_entries
,
1968 rt6_stats
.fib_rt_cache
,
1969 atomic_read(&ip6_dst_ops
.entries
),
1970 rt6_stats
.fib_discarded_routes
);
1975 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
1977 return single_open(file
, rt6_stats_seq_show
, NULL
);
1980 static struct file_operations rt6_stats_seq_fops
= {
1981 .owner
= THIS_MODULE
,
1982 .open
= rt6_stats_seq_open
,
1984 .llseek
= seq_lseek
,
1985 .release
= single_release
,
1987 #endif /* CONFIG_PROC_FS */
1989 #ifdef CONFIG_SYSCTL
1991 static int flush_delay
;
1994 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
, struct file
* filp
,
1995 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1998 proc_dointvec(ctl
, write
, filp
, buffer
, lenp
, ppos
);
1999 fib6_run_gc(flush_delay
<= 0 ? ~0UL : (unsigned long)flush_delay
);
2005 ctl_table ipv6_route_table
[] = {
2007 .ctl_name
= NET_IPV6_ROUTE_FLUSH
,
2008 .procname
= "flush",
2009 .data
= &flush_delay
,
2010 .maxlen
= sizeof(int),
2012 .proc_handler
= &ipv6_sysctl_rtcache_flush
2015 .ctl_name
= NET_IPV6_ROUTE_GC_THRESH
,
2016 .procname
= "gc_thresh",
2017 .data
= &ip6_dst_ops
.gc_thresh
,
2018 .maxlen
= sizeof(int),
2020 .proc_handler
= &proc_dointvec
,
2023 .ctl_name
= NET_IPV6_ROUTE_MAX_SIZE
,
2024 .procname
= "max_size",
2025 .data
= &ip6_rt_max_size
,
2026 .maxlen
= sizeof(int),
2028 .proc_handler
= &proc_dointvec
,
2031 .ctl_name
= NET_IPV6_ROUTE_GC_MIN_INTERVAL
,
2032 .procname
= "gc_min_interval",
2033 .data
= &ip6_rt_gc_min_interval
,
2034 .maxlen
= sizeof(int),
2036 .proc_handler
= &proc_dointvec_jiffies
,
2037 .strategy
= &sysctl_jiffies
,
2040 .ctl_name
= NET_IPV6_ROUTE_GC_TIMEOUT
,
2041 .procname
= "gc_timeout",
2042 .data
= &ip6_rt_gc_timeout
,
2043 .maxlen
= sizeof(int),
2045 .proc_handler
= &proc_dointvec_jiffies
,
2046 .strategy
= &sysctl_jiffies
,
2049 .ctl_name
= NET_IPV6_ROUTE_GC_INTERVAL
,
2050 .procname
= "gc_interval",
2051 .data
= &ip6_rt_gc_interval
,
2052 .maxlen
= sizeof(int),
2054 .proc_handler
= &proc_dointvec_jiffies
,
2055 .strategy
= &sysctl_jiffies
,
2058 .ctl_name
= NET_IPV6_ROUTE_GC_ELASTICITY
,
2059 .procname
= "gc_elasticity",
2060 .data
= &ip6_rt_gc_elasticity
,
2061 .maxlen
= sizeof(int),
2063 .proc_handler
= &proc_dointvec_jiffies
,
2064 .strategy
= &sysctl_jiffies
,
2067 .ctl_name
= NET_IPV6_ROUTE_MTU_EXPIRES
,
2068 .procname
= "mtu_expires",
2069 .data
= &ip6_rt_mtu_expires
,
2070 .maxlen
= sizeof(int),
2072 .proc_handler
= &proc_dointvec_jiffies
,
2073 .strategy
= &sysctl_jiffies
,
2076 .ctl_name
= NET_IPV6_ROUTE_MIN_ADVMSS
,
2077 .procname
= "min_adv_mss",
2078 .data
= &ip6_rt_min_advmss
,
2079 .maxlen
= sizeof(int),
2081 .proc_handler
= &proc_dointvec_jiffies
,
2082 .strategy
= &sysctl_jiffies
,
2085 .ctl_name
= NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS
,
2086 .procname
= "gc_min_interval_ms",
2087 .data
= &ip6_rt_gc_min_interval
,
2088 .maxlen
= sizeof(int),
2090 .proc_handler
= &proc_dointvec_ms_jiffies
,
2091 .strategy
= &sysctl_ms_jiffies
,
2098 void __init
ip6_route_init(void)
2100 struct proc_dir_entry
*p
;
2102 ip6_dst_ops
.kmem_cachep
= kmem_cache_create("ip6_dst_cache",
2103 sizeof(struct rt6_info
),
2104 0, SLAB_HWCACHE_ALIGN
,
2106 if (!ip6_dst_ops
.kmem_cachep
)
2107 panic("cannot create ip6_dst_cache");
2110 #ifdef CONFIG_PROC_FS
2111 p
= proc_net_create("ipv6_route", 0, rt6_proc_info
);
2113 p
->owner
= THIS_MODULE
;
2115 proc_net_fops_create("rt6_stats", S_IRUGO
, &rt6_stats_seq_fops
);
2122 void ip6_route_cleanup(void)
2124 #ifdef CONFIG_PROC_FS
2125 proc_net_remove("ipv6_route");
2126 proc_net_remove("rt6_stats");
2133 kmem_cache_destroy(ip6_dst_ops
.kmem_cachep
);