2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
26 * Fixed routing subtrees.
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
54 #include <linux/rtnetlink.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
60 #include <asm/uaccess.h>
63 #include <linux/sysctl.h>
66 /* Set to 3 to get tracing. */
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
74 #define RT6_TRACE(x...) do { ; } while (0)
77 #define CLONE_OFFLINK_ROUTE 0
79 static int ip6_rt_max_size
= 4096;
80 static int ip6_rt_gc_min_interval
= HZ
/ 2;
81 static int ip6_rt_gc_timeout
= 60*HZ
;
82 int ip6_rt_gc_interval
= 30*HZ
;
83 static int ip6_rt_gc_elasticity
= 9;
84 static int ip6_rt_mtu_expires
= 10*60*HZ
;
85 static int ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
87 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
);
88 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
89 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
90 static void ip6_dst_destroy(struct dst_entry
*);
91 static void ip6_dst_ifdown(struct dst_entry
*,
92 struct net_device
*dev
, int how
);
93 static int ip6_dst_gc(void);
95 static int ip6_pkt_discard(struct sk_buff
*skb
);
96 static int ip6_pkt_discard_out(struct sk_buff
*skb
);
97 static void ip6_link_failure(struct sk_buff
*skb
);
98 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info
*rt6_add_route_info(struct in6_addr
*prefix
, int prefixlen
,
102 struct in6_addr
*gwaddr
, int ifindex
,
104 static struct rt6_info
*rt6_get_route_info(struct in6_addr
*prefix
, int prefixlen
,
105 struct in6_addr
*gwaddr
, int ifindex
);
108 static struct dst_ops ip6_dst_ops
= {
110 .protocol
= __constant_htons(ETH_P_IPV6
),
113 .check
= ip6_dst_check
,
114 .destroy
= ip6_dst_destroy
,
115 .ifdown
= ip6_dst_ifdown
,
116 .negative_advice
= ip6_negative_advice
,
117 .link_failure
= ip6_link_failure
,
118 .update_pmtu
= ip6_rt_update_pmtu
,
119 .entry_size
= sizeof(struct rt6_info
),
122 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
126 static struct dst_ops ip6_dst_blackhole_ops
= {
128 .protocol
= __constant_htons(ETH_P_IPV6
),
129 .destroy
= ip6_dst_destroy
,
130 .check
= ip6_dst_check
,
131 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
132 .entry_size
= sizeof(struct rt6_info
),
135 struct rt6_info ip6_null_entry
= {
138 .__refcnt
= ATOMIC_INIT(1),
140 .dev
= &loopback_dev
,
142 .error
= -ENETUNREACH
,
143 .metrics
= { [RTAX_HOPLIMIT
- 1] = 255, },
144 .input
= ip6_pkt_discard
,
145 .output
= ip6_pkt_discard_out
,
147 .path
= (struct dst_entry
*)&ip6_null_entry
,
150 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
151 .rt6i_metric
= ~(u32
) 0,
152 .rt6i_ref
= ATOMIC_INIT(1),
155 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
157 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
158 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
);
159 static int ip6_pkt_blk_hole(struct sk_buff
*skb
);
161 struct rt6_info ip6_prohibit_entry
= {
164 .__refcnt
= ATOMIC_INIT(1),
166 .dev
= &loopback_dev
,
169 .metrics
= { [RTAX_HOPLIMIT
- 1] = 255, },
170 .input
= ip6_pkt_prohibit
,
171 .output
= ip6_pkt_prohibit_out
,
173 .path
= (struct dst_entry
*)&ip6_prohibit_entry
,
176 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
177 .rt6i_metric
= ~(u32
) 0,
178 .rt6i_ref
= ATOMIC_INIT(1),
181 struct rt6_info ip6_blk_hole_entry
= {
184 .__refcnt
= ATOMIC_INIT(1),
186 .dev
= &loopback_dev
,
189 .metrics
= { [RTAX_HOPLIMIT
- 1] = 255, },
190 .input
= ip6_pkt_blk_hole
,
191 .output
= ip6_pkt_blk_hole
,
193 .path
= (struct dst_entry
*)&ip6_blk_hole_entry
,
196 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
197 .rt6i_metric
= ~(u32
) 0,
198 .rt6i_ref
= ATOMIC_INIT(1),
203 /* allocate dst with ip6_dst_ops */
204 static __inline__
struct rt6_info
*ip6_dst_alloc(void)
206 return (struct rt6_info
*)dst_alloc(&ip6_dst_ops
);
209 static void ip6_dst_destroy(struct dst_entry
*dst
)
211 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
212 struct inet6_dev
*idev
= rt
->rt6i_idev
;
215 rt
->rt6i_idev
= NULL
;
220 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
223 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
224 struct inet6_dev
*idev
= rt
->rt6i_idev
;
226 if (dev
!= &loopback_dev
&& idev
!= NULL
&& idev
->dev
== dev
) {
227 struct inet6_dev
*loopback_idev
= in6_dev_get(&loopback_dev
);
228 if (loopback_idev
!= NULL
) {
229 rt
->rt6i_idev
= loopback_idev
;
235 static __inline__
int rt6_check_expired(const struct rt6_info
*rt
)
237 return (rt
->rt6i_flags
& RTF_EXPIRES
&&
238 time_after(jiffies
, rt
->rt6i_expires
));
241 static inline int rt6_need_strict(struct in6_addr
*daddr
)
243 return (ipv6_addr_type(daddr
) &
244 (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LINKLOCAL
));
248 * Route lookup. Any table->tb6_lock is implied.
251 static __inline__
struct rt6_info
*rt6_device_match(struct rt6_info
*rt
,
255 struct rt6_info
*local
= NULL
;
256 struct rt6_info
*sprt
;
259 for (sprt
= rt
; sprt
; sprt
= sprt
->u
.dst
.rt6_next
) {
260 struct net_device
*dev
= sprt
->rt6i_dev
;
261 if (dev
->ifindex
== oif
)
263 if (dev
->flags
& IFF_LOOPBACK
) {
264 if (sprt
->rt6i_idev
== NULL
||
265 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
268 if (local
&& (!oif
||
269 local
->rt6i_idev
->dev
->ifindex
== oif
))
280 return &ip6_null_entry
;
285 #ifdef CONFIG_IPV6_ROUTER_PREF
286 static void rt6_probe(struct rt6_info
*rt
)
288 struct neighbour
*neigh
= rt
? rt
->rt6i_nexthop
: NULL
;
290 * Okay, this does not seem to be appropriate
291 * for now, however, we need to check if it
292 * is really so; aka Router Reachability Probing.
294 * Router Reachability Probe MUST be rate-limited
295 * to no more than one per minute.
297 if (!neigh
|| (neigh
->nud_state
& NUD_VALID
))
299 read_lock_bh(&neigh
->lock
);
300 if (!(neigh
->nud_state
& NUD_VALID
) &&
301 time_after(jiffies
, neigh
->updated
+ rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
302 struct in6_addr mcaddr
;
303 struct in6_addr
*target
;
305 neigh
->updated
= jiffies
;
306 read_unlock_bh(&neigh
->lock
);
308 target
= (struct in6_addr
*)&neigh
->primary_key
;
309 addrconf_addr_solict_mult(target
, &mcaddr
);
310 ndisc_send_ns(rt
->rt6i_dev
, NULL
, target
, &mcaddr
, NULL
);
312 read_unlock_bh(&neigh
->lock
);
315 static inline void rt6_probe(struct rt6_info
*rt
)
322 * Default Router Selection (RFC 2461 6.3.6)
324 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
326 struct net_device
*dev
= rt
->rt6i_dev
;
327 if (!oif
|| dev
->ifindex
== oif
)
329 if ((dev
->flags
& IFF_LOOPBACK
) &&
330 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
335 static inline int rt6_check_neigh(struct rt6_info
*rt
)
337 struct neighbour
*neigh
= rt
->rt6i_nexthop
;
339 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
340 !(rt
->rt6i_flags
& RTF_GATEWAY
))
343 read_lock_bh(&neigh
->lock
);
344 if (neigh
->nud_state
& NUD_VALID
)
346 else if (!(neigh
->nud_state
& NUD_FAILED
))
348 read_unlock_bh(&neigh
->lock
);
353 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
358 m
= rt6_check_dev(rt
, oif
);
359 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
361 #ifdef CONFIG_IPV6_ROUTER_PREF
362 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
364 n
= rt6_check_neigh(rt
);
365 if (!n
&& (strict
& RT6_LOOKUP_F_REACHABLE
))
370 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
371 int *mpri
, struct rt6_info
*match
)
375 if (rt6_check_expired(rt
))
378 m
= rt6_score_route(rt
, oif
, strict
);
383 if (strict
& RT6_LOOKUP_F_REACHABLE
)
387 } else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
395 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
396 struct rt6_info
*rr_head
,
397 u32 metric
, int oif
, int strict
)
399 struct rt6_info
*rt
, *match
;
403 for (rt
= rr_head
; rt
&& rt
->rt6i_metric
== metric
;
404 rt
= rt
->u
.dst
.rt6_next
)
405 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
406 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
&& rt
->rt6i_metric
== metric
;
407 rt
= rt
->u
.dst
.rt6_next
)
408 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
413 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
415 struct rt6_info
*match
, *rt0
;
417 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
418 __FUNCTION__
, fn
->leaf
, oif
);
422 fn
->rr_ptr
= rt0
= fn
->leaf
;
424 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
);
427 (strict
& RT6_LOOKUP_F_REACHABLE
)) {
428 struct rt6_info
*next
= rt0
->u
.dst
.rt6_next
;
430 /* no entries matched; do round-robin */
431 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
438 RT6_TRACE("%s() => %p\n",
439 __FUNCTION__
, match
);
441 return (match
? match
: &ip6_null_entry
);
444 #ifdef CONFIG_IPV6_ROUTE_INFO
445 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
446 struct in6_addr
*gwaddr
)
448 struct route_info
*rinfo
= (struct route_info
*) opt
;
449 struct in6_addr prefix_buf
, *prefix
;
454 if (len
< sizeof(struct route_info
)) {
458 /* Sanity check for prefix_len and length */
459 if (rinfo
->length
> 3) {
461 } else if (rinfo
->prefix_len
> 128) {
463 } else if (rinfo
->prefix_len
> 64) {
464 if (rinfo
->length
< 2) {
467 } else if (rinfo
->prefix_len
> 0) {
468 if (rinfo
->length
< 1) {
473 pref
= rinfo
->route_pref
;
474 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
475 pref
= ICMPV6_ROUTER_PREF_MEDIUM
;
477 lifetime
= ntohl(rinfo
->lifetime
);
478 if (lifetime
== 0xffffffff) {
480 } else if (lifetime
> 0x7fffffff/HZ
) {
481 /* Avoid arithmetic overflow */
482 lifetime
= 0x7fffffff/HZ
- 1;
485 if (rinfo
->length
== 3)
486 prefix
= (struct in6_addr
*)rinfo
->prefix
;
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf
,
490 (struct in6_addr
*)rinfo
->prefix
,
492 prefix
= &prefix_buf
;
495 rt
= rt6_get_route_info(prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
);
497 if (rt
&& !lifetime
) {
503 rt
= rt6_add_route_info(prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
,
506 rt
->rt6i_flags
= RTF_ROUTEINFO
|
507 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
510 if (lifetime
== 0xffffffff) {
511 rt
->rt6i_flags
&= ~RTF_EXPIRES
;
513 rt
->rt6i_expires
= jiffies
+ HZ
* lifetime
;
514 rt
->rt6i_flags
|= RTF_EXPIRES
;
516 dst_release(&rt
->u
.dst
);
522 #define BACKTRACK(saddr) \
524 if (rt == &ip6_null_entry) { \
525 struct fib6_node *pn; \
527 if (fn->fn_flags & RTN_TL_ROOT) \
530 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
531 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
534 if (fn->fn_flags & RTN_RTINFO) \
540 static struct rt6_info
*ip6_pol_route_lookup(struct fib6_table
*table
,
541 struct flowi
*fl
, int flags
)
543 struct fib6_node
*fn
;
546 read_lock_bh(&table
->tb6_lock
);
547 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
550 rt
= rt6_device_match(rt
, fl
->oif
, flags
);
551 BACKTRACK(&fl
->fl6_src
);
553 dst_hold(&rt
->u
.dst
);
554 read_unlock_bh(&table
->tb6_lock
);
556 rt
->u
.dst
.lastuse
= jiffies
;
563 struct rt6_info
*rt6_lookup(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
574 struct dst_entry
*dst
;
575 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
578 memcpy(&fl
.fl6_src
, saddr
, sizeof(*saddr
));
579 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
582 dst
= fib6_rule_lookup(&fl
, flags
, ip6_pol_route_lookup
);
584 return (struct rt6_info
*) dst
;
591 EXPORT_SYMBOL(rt6_lookup
);
593 /* ip6_ins_rt is called with FREE table->tb6_lock.
594 It takes new route entry, the addition fails by any reason the
595 route is freed. In any case, if caller does not hold it, it may
599 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
)
602 struct fib6_table
*table
;
604 table
= rt
->rt6i_table
;
605 write_lock_bh(&table
->tb6_lock
);
606 err
= fib6_add(&table
->tb6_root
, rt
, info
);
607 write_unlock_bh(&table
->tb6_lock
);
612 int ip6_ins_rt(struct rt6_info
*rt
)
614 return __ip6_ins_rt(rt
, NULL
);
617 static struct rt6_info
*rt6_alloc_cow(struct rt6_info
*ort
, struct in6_addr
*daddr
,
618 struct in6_addr
*saddr
)
626 rt
= ip6_rt_copy(ort
);
629 if (!(rt
->rt6i_flags
&RTF_GATEWAY
)) {
630 if (rt
->rt6i_dst
.plen
!= 128 &&
631 ipv6_addr_equal(&rt
->rt6i_dst
.addr
, daddr
))
632 rt
->rt6i_flags
|= RTF_ANYCAST
;
633 ipv6_addr_copy(&rt
->rt6i_gateway
, daddr
);
636 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
637 rt
->rt6i_dst
.plen
= 128;
638 rt
->rt6i_flags
|= RTF_CACHE
;
639 rt
->u
.dst
.flags
|= DST_HOST
;
641 #ifdef CONFIG_IPV6_SUBTREES
642 if (rt
->rt6i_src
.plen
&& saddr
) {
643 ipv6_addr_copy(&rt
->rt6i_src
.addr
, saddr
);
644 rt
->rt6i_src
.plen
= 128;
648 rt
->rt6i_nexthop
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
655 static struct rt6_info
*rt6_alloc_clone(struct rt6_info
*ort
, struct in6_addr
*daddr
)
657 struct rt6_info
*rt
= ip6_rt_copy(ort
);
659 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
660 rt
->rt6i_dst
.plen
= 128;
661 rt
->rt6i_flags
|= RTF_CACHE
;
662 rt
->u
.dst
.flags
|= DST_HOST
;
663 rt
->rt6i_nexthop
= neigh_clone(ort
->rt6i_nexthop
);
668 static struct rt6_info
*ip6_pol_route_input(struct fib6_table
*table
,
669 struct flowi
*fl
, int flags
)
671 struct fib6_node
*fn
;
672 struct rt6_info
*rt
, *nrt
;
676 int reachable
= ipv6_devconf
.forwarding
? 0 : RT6_LOOKUP_F_REACHABLE
;
678 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
681 read_lock_bh(&table
->tb6_lock
);
684 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
687 rt
= rt6_select(fn
, fl
->iif
, strict
| reachable
);
688 BACKTRACK(&fl
->fl6_src
);
689 if (rt
== &ip6_null_entry
||
690 rt
->rt6i_flags
& RTF_CACHE
)
693 dst_hold(&rt
->u
.dst
);
694 read_unlock_bh(&table
->tb6_lock
);
696 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
697 nrt
= rt6_alloc_cow(rt
, &fl
->fl6_dst
, &fl
->fl6_src
);
699 #if CLONE_OFFLINK_ROUTE
700 nrt
= rt6_alloc_clone(rt
, &fl
->fl6_dst
);
706 dst_release(&rt
->u
.dst
);
707 rt
= nrt
? : &ip6_null_entry
;
709 dst_hold(&rt
->u
.dst
);
711 err
= ip6_ins_rt(nrt
);
720 * Race condition! In the gap, when table->tb6_lock was
721 * released someone could insert this route. Relookup.
723 dst_release(&rt
->u
.dst
);
731 dst_hold(&rt
->u
.dst
);
732 read_unlock_bh(&table
->tb6_lock
);
734 rt
->u
.dst
.lastuse
= jiffies
;
740 void ip6_route_input(struct sk_buff
*skb
)
742 struct ipv6hdr
*iph
= ipv6_hdr(skb
);
743 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
745 .iif
= skb
->dev
->ifindex
,
750 .flowlabel
= (* (__be32
*) iph
)&IPV6_FLOWINFO_MASK
,
754 .proto
= iph
->nexthdr
,
757 if (rt6_need_strict(&iph
->daddr
))
758 flags
|= RT6_LOOKUP_F_IFACE
;
760 skb
->dst
= fib6_rule_lookup(&fl
, flags
, ip6_pol_route_input
);
763 static struct rt6_info
*ip6_pol_route_output(struct fib6_table
*table
,
764 struct flowi
*fl
, int flags
)
766 struct fib6_node
*fn
;
767 struct rt6_info
*rt
, *nrt
;
771 int reachable
= ipv6_devconf
.forwarding
? 0 : RT6_LOOKUP_F_REACHABLE
;
773 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
776 read_lock_bh(&table
->tb6_lock
);
779 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
782 rt
= rt6_select(fn
, fl
->oif
, strict
| reachable
);
783 BACKTRACK(&fl
->fl6_src
);
784 if (rt
== &ip6_null_entry
||
785 rt
->rt6i_flags
& RTF_CACHE
)
788 dst_hold(&rt
->u
.dst
);
789 read_unlock_bh(&table
->tb6_lock
);
791 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
792 nrt
= rt6_alloc_cow(rt
, &fl
->fl6_dst
, &fl
->fl6_src
);
794 #if CLONE_OFFLINK_ROUTE
795 nrt
= rt6_alloc_clone(rt
, &fl
->fl6_dst
);
801 dst_release(&rt
->u
.dst
);
802 rt
= nrt
? : &ip6_null_entry
;
804 dst_hold(&rt
->u
.dst
);
806 err
= ip6_ins_rt(nrt
);
815 * Race condition! In the gap, when table->tb6_lock was
816 * released someone could insert this route. Relookup.
818 dst_release(&rt
->u
.dst
);
826 dst_hold(&rt
->u
.dst
);
827 read_unlock_bh(&table
->tb6_lock
);
829 rt
->u
.dst
.lastuse
= jiffies
;
834 struct dst_entry
* ip6_route_output(struct sock
*sk
, struct flowi
*fl
)
838 if (rt6_need_strict(&fl
->fl6_dst
))
839 flags
|= RT6_LOOKUP_F_IFACE
;
841 if (!ipv6_addr_any(&fl
->fl6_src
))
842 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
844 return fib6_rule_lookup(fl
, flags
, ip6_pol_route_output
);
847 EXPORT_SYMBOL(ip6_route_output
);
849 static int ip6_blackhole_output(struct sk_buff
*skb
)
855 int ip6_dst_blackhole(struct sock
*sk
, struct dst_entry
**dstp
, struct flowi
*fl
)
857 struct rt6_info
*ort
= (struct rt6_info
*) *dstp
;
858 struct rt6_info
*rt
= (struct rt6_info
*)
859 dst_alloc(&ip6_dst_blackhole_ops
);
860 struct dst_entry
*new = NULL
;
865 atomic_set(&new->__refcnt
, 1);
867 new->input
= ip6_blackhole_output
;
868 new->output
= ip6_blackhole_output
;
870 memcpy(new->metrics
, ort
->u
.dst
.metrics
, RTAX_MAX
*sizeof(u32
));
871 new->dev
= ort
->u
.dst
.dev
;
874 rt
->rt6i_idev
= ort
->rt6i_idev
;
876 in6_dev_hold(rt
->rt6i_idev
);
877 rt
->rt6i_expires
= 0;
879 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
880 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
883 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
884 #ifdef CONFIG_IPV6_SUBTREES
885 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
893 return (new ? 0 : -ENOMEM
);
895 EXPORT_SYMBOL_GPL(ip6_dst_blackhole
);
898 * Destination cache support functions
901 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
905 rt
= (struct rt6_info
*) dst
;
907 if (rt
&& rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
))
913 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
915 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
918 if (rt
->rt6i_flags
& RTF_CACHE
)
926 static void ip6_link_failure(struct sk_buff
*skb
)
930 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0, skb
->dev
);
932 rt
= (struct rt6_info
*) skb
->dst
;
934 if (rt
->rt6i_flags
&RTF_CACHE
) {
935 dst_set_expires(&rt
->u
.dst
, 0);
936 rt
->rt6i_flags
|= RTF_EXPIRES
;
937 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
938 rt
->rt6i_node
->fn_sernum
= -1;
942 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
944 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
946 if (mtu
< dst_mtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
947 rt6
->rt6i_flags
|= RTF_MODIFIED
;
948 if (mtu
< IPV6_MIN_MTU
) {
950 dst
->metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
952 dst
->metrics
[RTAX_MTU
-1] = mtu
;
953 call_netevent_notifiers(NETEVENT_PMTU_UPDATE
, dst
);
957 static int ipv6_get_mtu(struct net_device
*dev
);
959 static inline unsigned int ipv6_advmss(unsigned int mtu
)
961 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
963 if (mtu
< ip6_rt_min_advmss
)
964 mtu
= ip6_rt_min_advmss
;
967 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
968 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
969 * IPV6_MAXPLEN is also valid and means: "any MSS,
970 * rely only on pmtu discovery"
972 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
977 static struct dst_entry
*ndisc_dst_gc_list
;
978 static DEFINE_SPINLOCK(ndisc_lock
);
980 struct dst_entry
*ndisc_dst_alloc(struct net_device
*dev
,
981 struct neighbour
*neigh
,
982 struct in6_addr
*addr
,
983 int (*output
)(struct sk_buff
*))
986 struct inet6_dev
*idev
= in6_dev_get(dev
);
988 if (unlikely(idev
== NULL
))
991 rt
= ip6_dst_alloc();
992 if (unlikely(rt
== NULL
)) {
1001 neigh
= ndisc_get_neigh(dev
, addr
);
1004 rt
->rt6i_idev
= idev
;
1005 rt
->rt6i_nexthop
= neigh
;
1006 atomic_set(&rt
->u
.dst
.__refcnt
, 1);
1007 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = 255;
1008 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(rt
->rt6i_dev
);
1009 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
1010 rt
->u
.dst
.output
= output
;
1012 #if 0 /* there's no chance to use these for ndisc */
1013 rt
->u
.dst
.flags
= ipv6_addr_type(addr
) & IPV6_ADDR_UNICAST
1016 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1017 rt
->rt6i_dst
.plen
= 128;
1020 spin_lock_bh(&ndisc_lock
);
1021 rt
->u
.dst
.next
= ndisc_dst_gc_list
;
1022 ndisc_dst_gc_list
= &rt
->u
.dst
;
1023 spin_unlock_bh(&ndisc_lock
);
1025 fib6_force_start_gc();
1031 int ndisc_dst_gc(int *more
)
1033 struct dst_entry
*dst
, *next
, **pprev
;
1039 spin_lock_bh(&ndisc_lock
);
1040 pprev
= &ndisc_dst_gc_list
;
1042 while ((dst
= *pprev
) != NULL
) {
1043 if (!atomic_read(&dst
->__refcnt
)) {
1053 spin_unlock_bh(&ndisc_lock
);
1058 static int ip6_dst_gc(void)
1060 static unsigned expire
= 30*HZ
;
1061 static unsigned long last_gc
;
1062 unsigned long now
= jiffies
;
1064 if (time_after(last_gc
+ ip6_rt_gc_min_interval
, now
) &&
1065 atomic_read(&ip6_dst_ops
.entries
) <= ip6_rt_max_size
)
1069 fib6_run_gc(expire
);
1071 if (atomic_read(&ip6_dst_ops
.entries
) < ip6_dst_ops
.gc_thresh
)
1072 expire
= ip6_rt_gc_timeout
>>1;
1075 expire
-= expire
>>ip6_rt_gc_elasticity
;
1076 return (atomic_read(&ip6_dst_ops
.entries
) > ip6_rt_max_size
);
1079 /* Clean host part of a prefix. Not necessary in radix tree,
1080 but results in cleaner routing tables.
1082 Remove it only when all the things will work!
1085 static int ipv6_get_mtu(struct net_device
*dev
)
1087 int mtu
= IPV6_MIN_MTU
;
1088 struct inet6_dev
*idev
;
1090 idev
= in6_dev_get(dev
);
1092 mtu
= idev
->cnf
.mtu6
;
1098 int ipv6_get_hoplimit(struct net_device
*dev
)
1100 int hoplimit
= ipv6_devconf
.hop_limit
;
1101 struct inet6_dev
*idev
;
1103 idev
= in6_dev_get(dev
);
1105 hoplimit
= idev
->cnf
.hop_limit
;
1115 int ip6_route_add(struct fib6_config
*cfg
)
1118 struct rt6_info
*rt
= NULL
;
1119 struct net_device
*dev
= NULL
;
1120 struct inet6_dev
*idev
= NULL
;
1121 struct fib6_table
*table
;
1124 if (cfg
->fc_dst_len
> 128 || cfg
->fc_src_len
> 128)
1126 #ifndef CONFIG_IPV6_SUBTREES
1127 if (cfg
->fc_src_len
)
1130 if (cfg
->fc_ifindex
) {
1132 dev
= dev_get_by_index(cfg
->fc_ifindex
);
1135 idev
= in6_dev_get(dev
);
1140 if (cfg
->fc_metric
== 0)
1141 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1143 table
= fib6_new_table(cfg
->fc_table
);
1144 if (table
== NULL
) {
1149 rt
= ip6_dst_alloc();
1156 rt
->u
.dst
.obsolete
= -1;
1157 rt
->rt6i_expires
= jiffies
+ clock_t_to_jiffies(cfg
->fc_expires
);
1159 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1160 cfg
->fc_protocol
= RTPROT_BOOT
;
1161 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1163 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1165 if (addr_type
& IPV6_ADDR_MULTICAST
)
1166 rt
->u
.dst
.input
= ip6_mc_input
;
1168 rt
->u
.dst
.input
= ip6_forward
;
1170 rt
->u
.dst
.output
= ip6_output
;
1172 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
1173 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
1174 if (rt
->rt6i_dst
.plen
== 128)
1175 rt
->u
.dst
.flags
= DST_HOST
;
1177 #ifdef CONFIG_IPV6_SUBTREES
1178 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
1179 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
1182 rt
->rt6i_metric
= cfg
->fc_metric
;
1184 /* We cannot add true routes via loopback here,
1185 they would result in kernel looping; promote them to reject routes
1187 if ((cfg
->fc_flags
& RTF_REJECT
) ||
1188 (dev
&& (dev
->flags
&IFF_LOOPBACK
) && !(addr_type
&IPV6_ADDR_LOOPBACK
))) {
1189 /* hold loopback dev/idev if we haven't done so. */
1190 if (dev
!= &loopback_dev
) {
1195 dev
= &loopback_dev
;
1197 idev
= in6_dev_get(dev
);
1203 rt
->u
.dst
.output
= ip6_pkt_discard_out
;
1204 rt
->u
.dst
.input
= ip6_pkt_discard
;
1205 rt
->u
.dst
.error
= -ENETUNREACH
;
1206 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
1210 if (cfg
->fc_flags
& RTF_GATEWAY
) {
1211 struct in6_addr
*gw_addr
;
1214 gw_addr
= &cfg
->fc_gateway
;
1215 ipv6_addr_copy(&rt
->rt6i_gateway
, gw_addr
);
1216 gwa_type
= ipv6_addr_type(gw_addr
);
1218 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
1219 struct rt6_info
*grt
;
1221 /* IPv6 strictly inhibits using not link-local
1222 addresses as nexthop address.
1223 Otherwise, router will not able to send redirects.
1224 It is very good, but in some (rare!) circumstances
1225 (SIT, PtP, NBMA NOARP links) it is handy to allow
1226 some exceptions. --ANK
1229 if (!(gwa_type
&IPV6_ADDR_UNICAST
))
1232 grt
= rt6_lookup(gw_addr
, NULL
, cfg
->fc_ifindex
, 1);
1234 err
= -EHOSTUNREACH
;
1238 if (dev
!= grt
->rt6i_dev
) {
1239 dst_release(&grt
->u
.dst
);
1243 dev
= grt
->rt6i_dev
;
1244 idev
= grt
->rt6i_idev
;
1246 in6_dev_hold(grt
->rt6i_idev
);
1248 if (!(grt
->rt6i_flags
&RTF_GATEWAY
))
1250 dst_release(&grt
->u
.dst
);
1256 if (dev
== NULL
|| (dev
->flags
&IFF_LOOPBACK
))
1264 if (cfg
->fc_flags
& (RTF_GATEWAY
| RTF_NONEXTHOP
)) {
1265 rt
->rt6i_nexthop
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
1266 if (IS_ERR(rt
->rt6i_nexthop
)) {
1267 err
= PTR_ERR(rt
->rt6i_nexthop
);
1268 rt
->rt6i_nexthop
= NULL
;
1273 rt
->rt6i_flags
= cfg
->fc_flags
;
1280 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1281 int type
= nla
->nla_type
;
1284 if (type
> RTAX_MAX
) {
1289 rt
->u
.dst
.metrics
[type
- 1] = nla_get_u32(nla
);
1294 if (rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] == 0)
1295 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = -1;
1296 if (!rt
->u
.dst
.metrics
[RTAX_MTU
-1])
1297 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(dev
);
1298 if (!rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1])
1299 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
1300 rt
->u
.dst
.dev
= dev
;
1301 rt
->rt6i_idev
= idev
;
1302 rt
->rt6i_table
= table
;
1303 return __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
);
1311 dst_free(&rt
->u
.dst
);
1315 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
1318 struct fib6_table
*table
;
1320 if (rt
== &ip6_null_entry
)
1323 table
= rt
->rt6i_table
;
1324 write_lock_bh(&table
->tb6_lock
);
1326 err
= fib6_del(rt
, info
);
1327 dst_release(&rt
->u
.dst
);
1329 write_unlock_bh(&table
->tb6_lock
);
1334 int ip6_del_rt(struct rt6_info
*rt
)
1336 return __ip6_del_rt(rt
, NULL
);
1339 static int ip6_route_del(struct fib6_config
*cfg
)
1341 struct fib6_table
*table
;
1342 struct fib6_node
*fn
;
1343 struct rt6_info
*rt
;
1346 table
= fib6_get_table(cfg
->fc_table
);
1350 read_lock_bh(&table
->tb6_lock
);
1352 fn
= fib6_locate(&table
->tb6_root
,
1353 &cfg
->fc_dst
, cfg
->fc_dst_len
,
1354 &cfg
->fc_src
, cfg
->fc_src_len
);
1357 for (rt
= fn
->leaf
; rt
; rt
= rt
->u
.dst
.rt6_next
) {
1358 if (cfg
->fc_ifindex
&&
1359 (rt
->rt6i_dev
== NULL
||
1360 rt
->rt6i_dev
->ifindex
!= cfg
->fc_ifindex
))
1362 if (cfg
->fc_flags
& RTF_GATEWAY
&&
1363 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
1365 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
1367 dst_hold(&rt
->u
.dst
);
1368 read_unlock_bh(&table
->tb6_lock
);
1370 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
1373 read_unlock_bh(&table
->tb6_lock
);
1381 struct ip6rd_flowi
{
1383 struct in6_addr gateway
;
1386 static struct rt6_info
*__ip6_route_redirect(struct fib6_table
*table
,
1390 struct ip6rd_flowi
*rdfl
= (struct ip6rd_flowi
*)fl
;
1391 struct rt6_info
*rt
;
1392 struct fib6_node
*fn
;
1395 * Get the "current" route for this destination and
1396 * check if the redirect has come from approriate router.
1398 * RFC 2461 specifies that redirects should only be
1399 * accepted if they come from the nexthop to the target.
1400 * Due to the way the routes are chosen, this notion
1401 * is a bit fuzzy and one might need to check all possible
1405 read_lock_bh(&table
->tb6_lock
);
1406 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
1408 for (rt
= fn
->leaf
; rt
; rt
= rt
->u
.dst
.rt6_next
) {
1410 * Current route is on-link; redirect is always invalid.
1412 * Seems, previous statement is not true. It could
1413 * be node, which looks for us as on-link (f.e. proxy ndisc)
1414 * But then router serving it might decide, that we should
1415 * know truth 8)8) --ANK (980726).
1417 if (rt6_check_expired(rt
))
1419 if (!(rt
->rt6i_flags
& RTF_GATEWAY
))
1421 if (fl
->oif
!= rt
->rt6i_dev
->ifindex
)
1423 if (!ipv6_addr_equal(&rdfl
->gateway
, &rt
->rt6i_gateway
))
1429 rt
= &ip6_null_entry
;
1430 BACKTRACK(&fl
->fl6_src
);
1432 dst_hold(&rt
->u
.dst
);
1434 read_unlock_bh(&table
->tb6_lock
);
1439 static struct rt6_info
*ip6_route_redirect(struct in6_addr
*dest
,
1440 struct in6_addr
*src
,
1441 struct in6_addr
*gateway
,
1442 struct net_device
*dev
)
1444 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1445 struct ip6rd_flowi rdfl
= {
1447 .oif
= dev
->ifindex
,
1455 .gateway
= *gateway
,
1458 if (rt6_need_strict(dest
))
1459 flags
|= RT6_LOOKUP_F_IFACE
;
1461 return (struct rt6_info
*)fib6_rule_lookup((struct flowi
*)&rdfl
, flags
, __ip6_route_redirect
);
1464 void rt6_redirect(struct in6_addr
*dest
, struct in6_addr
*src
,
1465 struct in6_addr
*saddr
,
1466 struct neighbour
*neigh
, u8
*lladdr
, int on_link
)
1468 struct rt6_info
*rt
, *nrt
= NULL
;
1469 struct netevent_redirect netevent
;
1471 rt
= ip6_route_redirect(dest
, src
, saddr
, neigh
->dev
);
1473 if (rt
== &ip6_null_entry
) {
1474 if (net_ratelimit())
1475 printk(KERN_DEBUG
"rt6_redirect: source isn't a valid nexthop "
1476 "for redirect target\n");
1481 * We have finally decided to accept it.
1484 neigh_update(neigh
, lladdr
, NUD_STALE
,
1485 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
1486 NEIGH_UPDATE_F_OVERRIDE
|
1487 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
1488 NEIGH_UPDATE_F_ISROUTER
))
1492 * Redirect received -> path was valid.
1493 * Look, redirects are sent only in response to data packets,
1494 * so that this nexthop apparently is reachable. --ANK
1496 dst_confirm(&rt
->u
.dst
);
1498 /* Duplicate redirect: silently ignore. */
1499 if (neigh
== rt
->u
.dst
.neighbour
)
1502 nrt
= ip6_rt_copy(rt
);
1506 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1508 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1510 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, dest
);
1511 nrt
->rt6i_dst
.plen
= 128;
1512 nrt
->u
.dst
.flags
|= DST_HOST
;
1514 ipv6_addr_copy(&nrt
->rt6i_gateway
, (struct in6_addr
*)neigh
->primary_key
);
1515 nrt
->rt6i_nexthop
= neigh_clone(neigh
);
1516 /* Reset pmtu, it may be better */
1517 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(neigh
->dev
);
1518 nrt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&nrt
->u
.dst
));
1520 if (ip6_ins_rt(nrt
))
1523 netevent
.old
= &rt
->u
.dst
;
1524 netevent
.new = &nrt
->u
.dst
;
1525 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
1527 if (rt
->rt6i_flags
&RTF_CACHE
) {
1533 dst_release(&rt
->u
.dst
);
1538 * Handle ICMP "packet too big" messages
1539 * i.e. Path MTU discovery
1542 void rt6_pmtu_discovery(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
1543 struct net_device
*dev
, u32 pmtu
)
1545 struct rt6_info
*rt
, *nrt
;
1548 rt
= rt6_lookup(daddr
, saddr
, dev
->ifindex
, 0);
1552 if (pmtu
>= dst_mtu(&rt
->u
.dst
))
1555 if (pmtu
< IPV6_MIN_MTU
) {
1557 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1558 * MTU (1280) and a fragment header should always be included
1559 * after a node receiving Too Big message reporting PMTU is
1560 * less than the IPv6 Minimum Link MTU.
1562 pmtu
= IPV6_MIN_MTU
;
1566 /* New mtu received -> path was valid.
1567 They are sent only in response to data packets,
1568 so that this nexthop apparently is reachable. --ANK
1570 dst_confirm(&rt
->u
.dst
);
1572 /* Host route. If it is static, it would be better
1573 not to override it, but add new one, so that
1574 when cache entry will expire old pmtu
1575 would return automatically.
1577 if (rt
->rt6i_flags
& RTF_CACHE
) {
1578 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1580 rt
->u
.dst
.metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
1581 dst_set_expires(&rt
->u
.dst
, ip6_rt_mtu_expires
);
1582 rt
->rt6i_flags
|= RTF_MODIFIED
|RTF_EXPIRES
;
1587 Two cases are possible:
1588 1. It is connected route. Action: COW
1589 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1591 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
1592 nrt
= rt6_alloc_cow(rt
, daddr
, saddr
);
1594 nrt
= rt6_alloc_clone(rt
, daddr
);
1597 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1599 nrt
->u
.dst
.metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
1601 /* According to RFC 1981, detecting PMTU increase shouldn't be
1602 * happened within 5 mins, the recommended timer is 10 mins.
1603 * Here this route expiration time is set to ip6_rt_mtu_expires
1604 * which is 10 mins. After 10 mins the decreased pmtu is expired
1605 * and detecting PMTU increase will be automatically happened.
1607 dst_set_expires(&nrt
->u
.dst
, ip6_rt_mtu_expires
);
1608 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_EXPIRES
;
1613 dst_release(&rt
->u
.dst
);
1617 * Misc support functions
1620 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
)
1622 struct rt6_info
*rt
= ip6_dst_alloc();
1625 rt
->u
.dst
.input
= ort
->u
.dst
.input
;
1626 rt
->u
.dst
.output
= ort
->u
.dst
.output
;
1628 memcpy(rt
->u
.dst
.metrics
, ort
->u
.dst
.metrics
, RTAX_MAX
*sizeof(u32
));
1629 rt
->u
.dst
.error
= ort
->u
.dst
.error
;
1630 rt
->u
.dst
.dev
= ort
->u
.dst
.dev
;
1632 dev_hold(rt
->u
.dst
.dev
);
1633 rt
->rt6i_idev
= ort
->rt6i_idev
;
1635 in6_dev_hold(rt
->rt6i_idev
);
1636 rt
->u
.dst
.lastuse
= jiffies
;
1637 rt
->rt6i_expires
= 0;
1639 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
1640 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
1641 rt
->rt6i_metric
= 0;
1643 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1644 #ifdef CONFIG_IPV6_SUBTREES
1645 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1647 rt
->rt6i_table
= ort
->rt6i_table
;
1652 #ifdef CONFIG_IPV6_ROUTE_INFO
1653 static struct rt6_info
*rt6_get_route_info(struct in6_addr
*prefix
, int prefixlen
,
1654 struct in6_addr
*gwaddr
, int ifindex
)
1656 struct fib6_node
*fn
;
1657 struct rt6_info
*rt
= NULL
;
1658 struct fib6_table
*table
;
1660 table
= fib6_get_table(RT6_TABLE_INFO
);
1664 write_lock_bh(&table
->tb6_lock
);
1665 fn
= fib6_locate(&table
->tb6_root
, prefix
,prefixlen
, NULL
, 0);
1669 for (rt
= fn
->leaf
; rt
; rt
= rt
->u
.dst
.rt6_next
) {
1670 if (rt
->rt6i_dev
->ifindex
!= ifindex
)
1672 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
1674 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
1676 dst_hold(&rt
->u
.dst
);
1680 write_unlock_bh(&table
->tb6_lock
);
1684 static struct rt6_info
*rt6_add_route_info(struct in6_addr
*prefix
, int prefixlen
,
1685 struct in6_addr
*gwaddr
, int ifindex
,
1688 struct fib6_config cfg
= {
1689 .fc_table
= RT6_TABLE_INFO
,
1691 .fc_ifindex
= ifindex
,
1692 .fc_dst_len
= prefixlen
,
1693 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
1694 RTF_UP
| RTF_PREF(pref
),
1697 ipv6_addr_copy(&cfg
.fc_dst
, prefix
);
1698 ipv6_addr_copy(&cfg
.fc_gateway
, gwaddr
);
1700 /* We should treat it as a default route if prefix length is 0. */
1702 cfg
.fc_flags
|= RTF_DEFAULT
;
1704 ip6_route_add(&cfg
);
1706 return rt6_get_route_info(prefix
, prefixlen
, gwaddr
, ifindex
);
1710 struct rt6_info
*rt6_get_dflt_router(struct in6_addr
*addr
, struct net_device
*dev
)
1712 struct rt6_info
*rt
;
1713 struct fib6_table
*table
;
1715 table
= fib6_get_table(RT6_TABLE_DFLT
);
1719 write_lock_bh(&table
->tb6_lock
);
1720 for (rt
= table
->tb6_root
.leaf
; rt
; rt
=rt
->u
.dst
.rt6_next
) {
1721 if (dev
== rt
->rt6i_dev
&&
1722 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
1723 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
1727 dst_hold(&rt
->u
.dst
);
1728 write_unlock_bh(&table
->tb6_lock
);
1732 struct rt6_info
*rt6_add_dflt_router(struct in6_addr
*gwaddr
,
1733 struct net_device
*dev
,
1736 struct fib6_config cfg
= {
1737 .fc_table
= RT6_TABLE_DFLT
,
1739 .fc_ifindex
= dev
->ifindex
,
1740 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
1741 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
1744 ipv6_addr_copy(&cfg
.fc_gateway
, gwaddr
);
1746 ip6_route_add(&cfg
);
1748 return rt6_get_dflt_router(gwaddr
, dev
);
1751 void rt6_purge_dflt_routers(void)
1753 struct rt6_info
*rt
;
1754 struct fib6_table
*table
;
1756 /* NOTE: Keep consistent with rt6_get_dflt_router */
1757 table
= fib6_get_table(RT6_TABLE_DFLT
);
1762 read_lock_bh(&table
->tb6_lock
);
1763 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->u
.dst
.rt6_next
) {
1764 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) {
1765 dst_hold(&rt
->u
.dst
);
1766 read_unlock_bh(&table
->tb6_lock
);
1771 read_unlock_bh(&table
->tb6_lock
);
1774 static void rtmsg_to_fib6_config(struct in6_rtmsg
*rtmsg
,
1775 struct fib6_config
*cfg
)
1777 memset(cfg
, 0, sizeof(*cfg
));
1779 cfg
->fc_table
= RT6_TABLE_MAIN
;
1780 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
1781 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
1782 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
1783 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
1784 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
1785 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
1787 ipv6_addr_copy(&cfg
->fc_dst
, &rtmsg
->rtmsg_dst
);
1788 ipv6_addr_copy(&cfg
->fc_src
, &rtmsg
->rtmsg_src
);
1789 ipv6_addr_copy(&cfg
->fc_gateway
, &rtmsg
->rtmsg_gateway
);
1792 int ipv6_route_ioctl(unsigned int cmd
, void __user
*arg
)
1794 struct fib6_config cfg
;
1795 struct in6_rtmsg rtmsg
;
1799 case SIOCADDRT
: /* Add a route */
1800 case SIOCDELRT
: /* Delete a route */
1801 if (!capable(CAP_NET_ADMIN
))
1803 err
= copy_from_user(&rtmsg
, arg
,
1804 sizeof(struct in6_rtmsg
));
1808 rtmsg_to_fib6_config(&rtmsg
, &cfg
);
1813 err
= ip6_route_add(&cfg
);
1816 err
= ip6_route_del(&cfg
);
1830 * Drop the packet on the floor
1833 static inline int ip6_pkt_drop(struct sk_buff
*skb
, int code
,
1834 int ipstats_mib_noroutes
)
1837 switch (ipstats_mib_noroutes
) {
1838 case IPSTATS_MIB_INNOROUTES
:
1839 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
1840 if (type
== IPV6_ADDR_ANY
|| type
== IPV6_ADDR_RESERVED
) {
1841 IP6_INC_STATS(ip6_dst_idev(skb
->dst
), IPSTATS_MIB_INADDRERRORS
);
1845 case IPSTATS_MIB_OUTNOROUTES
:
1846 IP6_INC_STATS(ip6_dst_idev(skb
->dst
), ipstats_mib_noroutes
);
1849 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0, skb
->dev
);
1854 static int ip6_pkt_discard(struct sk_buff
*skb
)
1856 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
1859 static int ip6_pkt_discard_out(struct sk_buff
*skb
)
1861 skb
->dev
= skb
->dst
->dev
;
1862 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
1865 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1867 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
1869 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
1872 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
)
1874 skb
->dev
= skb
->dst
->dev
;
1875 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
1878 static int ip6_pkt_blk_hole(struct sk_buff
*skb
)
1887 * Allocate a dst for local (unicast / anycast) address.
1890 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
1891 const struct in6_addr
*addr
,
1894 struct rt6_info
*rt
= ip6_dst_alloc();
1897 return ERR_PTR(-ENOMEM
);
1899 dev_hold(&loopback_dev
);
1902 rt
->u
.dst
.flags
= DST_HOST
;
1903 rt
->u
.dst
.input
= ip6_input
;
1904 rt
->u
.dst
.output
= ip6_output
;
1905 rt
->rt6i_dev
= &loopback_dev
;
1906 rt
->rt6i_idev
= idev
;
1907 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(rt
->rt6i_dev
);
1908 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
1909 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = -1;
1910 rt
->u
.dst
.obsolete
= -1;
1912 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
1914 rt
->rt6i_flags
|= RTF_ANYCAST
;
1916 rt
->rt6i_flags
|= RTF_LOCAL
;
1917 rt
->rt6i_nexthop
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
1918 if (rt
->rt6i_nexthop
== NULL
) {
1919 dst_free(&rt
->u
.dst
);
1920 return ERR_PTR(-ENOMEM
);
1923 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1924 rt
->rt6i_dst
.plen
= 128;
1925 rt
->rt6i_table
= fib6_get_table(RT6_TABLE_LOCAL
);
1927 atomic_set(&rt
->u
.dst
.__refcnt
, 1);
1932 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
1934 if (((void*)rt
->rt6i_dev
== arg
|| arg
== NULL
) &&
1935 rt
!= &ip6_null_entry
) {
1936 RT6_TRACE("deleted by ifdown %p\n", rt
);
1942 void rt6_ifdown(struct net_device
*dev
)
1944 fib6_clean_all(fib6_ifdown
, 0, dev
);
1947 struct rt6_mtu_change_arg
1949 struct net_device
*dev
;
1953 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
1955 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
1956 struct inet6_dev
*idev
;
1958 /* In IPv6 pmtu discovery is not optional,
1959 so that RTAX_MTU lock cannot disable it.
1960 We still use this lock to block changes
1961 caused by addrconf/ndisc.
1964 idev
= __in6_dev_get(arg
->dev
);
1968 /* For administrative MTU increase, there is no way to discover
1969 IPv6 PMTU increase, so PMTU increase should be updated here.
1970 Since RFC 1981 doesn't include administrative MTU increase
1971 update PMTU increase is a MUST. (i.e. jumbo frame)
1974 If new MTU is less than route PMTU, this new MTU will be the
1975 lowest MTU in the path, update the route PMTU to reflect PMTU
1976 decreases; if new MTU is greater than route PMTU, and the
1977 old MTU is the lowest MTU in the path, update the route PMTU
1978 to reflect the increase. In this case if the other nodes' MTU
1979 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1982 if (rt
->rt6i_dev
== arg
->dev
&&
1983 !dst_metric_locked(&rt
->u
.dst
, RTAX_MTU
) &&
1984 (dst_mtu(&rt
->u
.dst
) > arg
->mtu
||
1985 (dst_mtu(&rt
->u
.dst
) < arg
->mtu
&&
1986 dst_mtu(&rt
->u
.dst
) == idev
->cnf
.mtu6
))) {
1987 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = arg
->mtu
;
1988 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(arg
->mtu
);
1993 void rt6_mtu_change(struct net_device
*dev
, unsigned mtu
)
1995 struct rt6_mtu_change_arg arg
= {
2000 fib6_clean_all(rt6_mtu_change_route
, 0, &arg
);
2003 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2004 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2005 [RTA_OIF
] = { .type
= NLA_U32
},
2006 [RTA_IIF
] = { .type
= NLA_U32
},
2007 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2008 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2011 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2012 struct fib6_config
*cfg
)
2015 struct nlattr
*tb
[RTA_MAX
+1];
2018 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2023 rtm
= nlmsg_data(nlh
);
2024 memset(cfg
, 0, sizeof(*cfg
));
2026 cfg
->fc_table
= rtm
->rtm_table
;
2027 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2028 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2029 cfg
->fc_flags
= RTF_UP
;
2030 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2032 if (rtm
->rtm_type
== RTN_UNREACHABLE
)
2033 cfg
->fc_flags
|= RTF_REJECT
;
2035 cfg
->fc_nlinfo
.pid
= NETLINK_CB(skb
).pid
;
2036 cfg
->fc_nlinfo
.nlh
= nlh
;
2038 if (tb
[RTA_GATEWAY
]) {
2039 nla_memcpy(&cfg
->fc_gateway
, tb
[RTA_GATEWAY
], 16);
2040 cfg
->fc_flags
|= RTF_GATEWAY
;
2044 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
2046 if (nla_len(tb
[RTA_DST
]) < plen
)
2049 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
2053 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
2055 if (nla_len(tb
[RTA_SRC
]) < plen
)
2058 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
2062 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
2064 if (tb
[RTA_PRIORITY
])
2065 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
2067 if (tb
[RTA_METRICS
]) {
2068 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
2069 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
2073 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
2080 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2082 struct fib6_config cfg
;
2085 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2089 return ip6_route_del(&cfg
);
2092 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2094 struct fib6_config cfg
;
2097 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2101 return ip6_route_add(&cfg
);
2104 static inline size_t rt6_nlmsg_size(void)
2106 return NLMSG_ALIGN(sizeof(struct rtmsg
))
2107 + nla_total_size(16) /* RTA_SRC */
2108 + nla_total_size(16) /* RTA_DST */
2109 + nla_total_size(16) /* RTA_GATEWAY */
2110 + nla_total_size(16) /* RTA_PREFSRC */
2111 + nla_total_size(4) /* RTA_TABLE */
2112 + nla_total_size(4) /* RTA_IIF */
2113 + nla_total_size(4) /* RTA_OIF */
2114 + nla_total_size(4) /* RTA_PRIORITY */
2115 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
2116 + nla_total_size(sizeof(struct rta_cacheinfo
));
2119 static int rt6_fill_node(struct sk_buff
*skb
, struct rt6_info
*rt
,
2120 struct in6_addr
*dst
, struct in6_addr
*src
,
2121 int iif
, int type
, u32 pid
, u32 seq
,
2122 int prefix
, unsigned int flags
)
2125 struct nlmsghdr
*nlh
;
2129 if (prefix
) { /* user wants prefix routes only */
2130 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
2131 /* success since this is not a prefix route */
2136 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*rtm
), flags
);
2140 rtm
= nlmsg_data(nlh
);
2141 rtm
->rtm_family
= AF_INET6
;
2142 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
2143 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
2146 table
= rt
->rt6i_table
->tb6_id
;
2148 table
= RT6_TABLE_UNSPEC
;
2149 rtm
->rtm_table
= table
;
2150 NLA_PUT_U32(skb
, RTA_TABLE
, table
);
2151 if (rt
->rt6i_flags
&RTF_REJECT
)
2152 rtm
->rtm_type
= RTN_UNREACHABLE
;
2153 else if (rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
&IFF_LOOPBACK
))
2154 rtm
->rtm_type
= RTN_LOCAL
;
2156 rtm
->rtm_type
= RTN_UNICAST
;
2158 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2159 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
2160 if (rt
->rt6i_flags
&RTF_DYNAMIC
)
2161 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
2162 else if (rt
->rt6i_flags
& RTF_ADDRCONF
)
2163 rtm
->rtm_protocol
= RTPROT_KERNEL
;
2164 else if (rt
->rt6i_flags
&RTF_DEFAULT
)
2165 rtm
->rtm_protocol
= RTPROT_RA
;
2167 if (rt
->rt6i_flags
&RTF_CACHE
)
2168 rtm
->rtm_flags
|= RTM_F_CLONED
;
2171 NLA_PUT(skb
, RTA_DST
, 16, dst
);
2172 rtm
->rtm_dst_len
= 128;
2173 } else if (rtm
->rtm_dst_len
)
2174 NLA_PUT(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
);
2175 #ifdef CONFIG_IPV6_SUBTREES
2177 NLA_PUT(skb
, RTA_SRC
, 16, src
);
2178 rtm
->rtm_src_len
= 128;
2179 } else if (rtm
->rtm_src_len
)
2180 NLA_PUT(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
);
2183 NLA_PUT_U32(skb
, RTA_IIF
, iif
);
2185 struct in6_addr saddr_buf
;
2186 if (ipv6_get_saddr(&rt
->u
.dst
, dst
, &saddr_buf
) == 0)
2187 NLA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
2190 if (rtnetlink_put_metrics(skb
, rt
->u
.dst
.metrics
) < 0)
2191 goto nla_put_failure
;
2193 if (rt
->u
.dst
.neighbour
)
2194 NLA_PUT(skb
, RTA_GATEWAY
, 16, &rt
->u
.dst
.neighbour
->primary_key
);
2197 NLA_PUT_U32(skb
, RTA_OIF
, rt
->rt6i_dev
->ifindex
);
2199 NLA_PUT_U32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
);
2201 expires
= rt
->rt6i_expires
? rt
->rt6i_expires
- jiffies
: 0;
2202 if (rtnl_put_cacheinfo(skb
, &rt
->u
.dst
, 0, 0, 0,
2203 expires
, rt
->u
.dst
.error
) < 0)
2204 goto nla_put_failure
;
2206 return nlmsg_end(skb
, nlh
);
2209 nlmsg_cancel(skb
, nlh
);
2213 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
2215 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
2218 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
2219 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
2220 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
2224 return rt6_fill_node(arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
2225 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
,
2226 prefix
, NLM_F_MULTI
);
2229 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
2231 struct nlattr
*tb
[RTA_MAX
+1];
2232 struct rt6_info
*rt
;
2233 struct sk_buff
*skb
;
2238 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2243 memset(&fl
, 0, sizeof(fl
));
2246 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
2249 ipv6_addr_copy(&fl
.fl6_src
, nla_data(tb
[RTA_SRC
]));
2253 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
2256 ipv6_addr_copy(&fl
.fl6_dst
, nla_data(tb
[RTA_DST
]));
2260 iif
= nla_get_u32(tb
[RTA_IIF
]);
2263 fl
.oif
= nla_get_u32(tb
[RTA_OIF
]);
2266 struct net_device
*dev
;
2267 dev
= __dev_get_by_index(iif
);
2274 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
2280 /* Reserve room for dummy headers, this skb can pass
2281 through good chunk of routing engine.
2283 skb_reset_mac_header(skb
);
2284 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
2286 rt
= (struct rt6_info
*) ip6_route_output(NULL
, &fl
);
2287 skb
->dst
= &rt
->u
.dst
;
2289 err
= rt6_fill_node(skb
, rt
, &fl
.fl6_dst
, &fl
.fl6_src
, iif
,
2290 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
,
2291 nlh
->nlmsg_seq
, 0, 0);
2297 err
= rtnl_unicast(skb
, NETLINK_CB(in_skb
).pid
);
2302 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
)
2304 struct sk_buff
*skb
;
2305 u32 pid
= 0, seq
= 0;
2306 struct nlmsghdr
*nlh
= NULL
;
2313 seq
= nlh
->nlmsg_seq
;
2316 skb
= nlmsg_new(rt6_nlmsg_size(), gfp_any());
2320 err
= rt6_fill_node(skb
, rt
, NULL
, NULL
, 0, event
, pid
, seq
, 0, 0);
2322 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2323 WARN_ON(err
== -EMSGSIZE
);
2327 err
= rtnl_notify(skb
, pid
, RTNLGRP_IPV6_ROUTE
, nlh
, gfp_any());
2330 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE
, err
);
2337 #ifdef CONFIG_PROC_FS
2339 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2350 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
2352 struct rt6_proc_arg
*arg
= (struct rt6_proc_arg
*) p_arg
;
2354 if (arg
->skip
< arg
->offset
/ RT6_INFO_LEN
) {
2359 if (arg
->len
>= arg
->length
)
2362 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2363 NIP6_SEQFMT
" %02x ",
2364 NIP6(rt
->rt6i_dst
.addr
),
2367 #ifdef CONFIG_IPV6_SUBTREES
2368 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2369 NIP6_SEQFMT
" %02x ",
2370 NIP6(rt
->rt6i_src
.addr
),
2373 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2374 "00000000000000000000000000000000 00 ");
2377 if (rt
->rt6i_nexthop
) {
2378 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2380 NIP6(*((struct in6_addr
*)rt
->rt6i_nexthop
->primary_key
)));
2382 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2383 "00000000000000000000000000000000");
2385 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2386 " %08x %08x %08x %08x %8s\n",
2387 rt
->rt6i_metric
, atomic_read(&rt
->u
.dst
.__refcnt
),
2388 rt
->u
.dst
.__use
, rt
->rt6i_flags
,
2389 rt
->rt6i_dev
? rt
->rt6i_dev
->name
: "");
2393 static int rt6_proc_info(char *buffer
, char **start
, off_t offset
, int length
)
2395 struct rt6_proc_arg arg
= {
2401 fib6_clean_all(rt6_info_route
, 0, &arg
);
2405 *start
+= offset
% RT6_INFO_LEN
;
2407 arg
.len
-= offset
% RT6_INFO_LEN
;
2409 if (arg
.len
> length
)
2417 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
2419 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
2420 rt6_stats
.fib_nodes
, rt6_stats
.fib_route_nodes
,
2421 rt6_stats
.fib_rt_alloc
, rt6_stats
.fib_rt_entries
,
2422 rt6_stats
.fib_rt_cache
,
2423 atomic_read(&ip6_dst_ops
.entries
),
2424 rt6_stats
.fib_discarded_routes
);
2429 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
2431 return single_open(file
, rt6_stats_seq_show
, NULL
);
2434 static const struct file_operations rt6_stats_seq_fops
= {
2435 .owner
= THIS_MODULE
,
2436 .open
= rt6_stats_seq_open
,
2438 .llseek
= seq_lseek
,
2439 .release
= single_release
,
2441 #endif /* CONFIG_PROC_FS */
2443 #ifdef CONFIG_SYSCTL
2445 static int flush_delay
;
2448 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
, struct file
* filp
,
2449 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2452 proc_dointvec(ctl
, write
, filp
, buffer
, lenp
, ppos
);
2453 fib6_run_gc(flush_delay
<= 0 ? ~0UL : (unsigned long)flush_delay
);
2459 ctl_table ipv6_route_table
[] = {
2461 .ctl_name
= NET_IPV6_ROUTE_FLUSH
,
2462 .procname
= "flush",
2463 .data
= &flush_delay
,
2464 .maxlen
= sizeof(int),
2466 .proc_handler
= &ipv6_sysctl_rtcache_flush
2469 .ctl_name
= NET_IPV6_ROUTE_GC_THRESH
,
2470 .procname
= "gc_thresh",
2471 .data
= &ip6_dst_ops
.gc_thresh
,
2472 .maxlen
= sizeof(int),
2474 .proc_handler
= &proc_dointvec
,
2477 .ctl_name
= NET_IPV6_ROUTE_MAX_SIZE
,
2478 .procname
= "max_size",
2479 .data
= &ip6_rt_max_size
,
2480 .maxlen
= sizeof(int),
2482 .proc_handler
= &proc_dointvec
,
2485 .ctl_name
= NET_IPV6_ROUTE_GC_MIN_INTERVAL
,
2486 .procname
= "gc_min_interval",
2487 .data
= &ip6_rt_gc_min_interval
,
2488 .maxlen
= sizeof(int),
2490 .proc_handler
= &proc_dointvec_jiffies
,
2491 .strategy
= &sysctl_jiffies
,
2494 .ctl_name
= NET_IPV6_ROUTE_GC_TIMEOUT
,
2495 .procname
= "gc_timeout",
2496 .data
= &ip6_rt_gc_timeout
,
2497 .maxlen
= sizeof(int),
2499 .proc_handler
= &proc_dointvec_jiffies
,
2500 .strategy
= &sysctl_jiffies
,
2503 .ctl_name
= NET_IPV6_ROUTE_GC_INTERVAL
,
2504 .procname
= "gc_interval",
2505 .data
= &ip6_rt_gc_interval
,
2506 .maxlen
= sizeof(int),
2508 .proc_handler
= &proc_dointvec_jiffies
,
2509 .strategy
= &sysctl_jiffies
,
2512 .ctl_name
= NET_IPV6_ROUTE_GC_ELASTICITY
,
2513 .procname
= "gc_elasticity",
2514 .data
= &ip6_rt_gc_elasticity
,
2515 .maxlen
= sizeof(int),
2517 .proc_handler
= &proc_dointvec_jiffies
,
2518 .strategy
= &sysctl_jiffies
,
2521 .ctl_name
= NET_IPV6_ROUTE_MTU_EXPIRES
,
2522 .procname
= "mtu_expires",
2523 .data
= &ip6_rt_mtu_expires
,
2524 .maxlen
= sizeof(int),
2526 .proc_handler
= &proc_dointvec_jiffies
,
2527 .strategy
= &sysctl_jiffies
,
2530 .ctl_name
= NET_IPV6_ROUTE_MIN_ADVMSS
,
2531 .procname
= "min_adv_mss",
2532 .data
= &ip6_rt_min_advmss
,
2533 .maxlen
= sizeof(int),
2535 .proc_handler
= &proc_dointvec_jiffies
,
2536 .strategy
= &sysctl_jiffies
,
2539 .ctl_name
= NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS
,
2540 .procname
= "gc_min_interval_ms",
2541 .data
= &ip6_rt_gc_min_interval
,
2542 .maxlen
= sizeof(int),
2544 .proc_handler
= &proc_dointvec_ms_jiffies
,
2545 .strategy
= &sysctl_ms_jiffies
,
2552 void __init
ip6_route_init(void)
2554 #ifdef CONFIG_PROC_FS
2555 struct proc_dir_entry
*p
;
2557 ip6_dst_ops
.kmem_cachep
=
2558 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
2559 SLAB_HWCACHE_ALIGN
|SLAB_PANIC
, NULL
);
2560 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops
.kmem_cachep
;
2563 #ifdef CONFIG_PROC_FS
2564 p
= proc_net_create("ipv6_route", 0, rt6_proc_info
);
2566 p
->owner
= THIS_MODULE
;
2568 proc_net_fops_create("rt6_stats", S_IRUGO
, &rt6_stats_seq_fops
);
2573 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2577 __rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
);
2578 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
);
2579 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
);
2582 void ip6_route_cleanup(void)
2584 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2585 fib6_rules_cleanup();
2587 #ifdef CONFIG_PROC_FS
2588 proc_net_remove("ipv6_route");
2589 proc_net_remove("rt6_stats");
2596 kmem_cache_destroy(ip6_dst_ops
.kmem_cachep
);