2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
57 #include <net/dst_metadata.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
67 #include <asm/uaccess.h>
70 #include <linux/sysctl.h>
74 RT6_NUD_FAIL_HARD
= -3,
75 RT6_NUD_FAIL_PROBE
= -2,
76 RT6_NUD_FAIL_DO_RR
= -1,
80 static void ip6_rt_copy_init(struct rt6_info
*rt
, struct rt6_info
*ort
);
81 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
82 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
);
83 static unsigned int ip6_mtu(const struct dst_entry
*dst
);
84 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
85 static void ip6_dst_destroy(struct dst_entry
*);
86 static void ip6_dst_ifdown(struct dst_entry
*,
87 struct net_device
*dev
, int how
);
88 static int ip6_dst_gc(struct dst_ops
*ops
);
90 static int ip6_pkt_discard(struct sk_buff
*skb
);
91 static int ip6_pkt_discard_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
);
92 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
93 static int ip6_pkt_prohibit_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
);
94 static void ip6_link_failure(struct sk_buff
*skb
);
95 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
96 struct sk_buff
*skb
, u32 mtu
);
97 static void rt6_do_redirect(struct dst_entry
*dst
, struct sock
*sk
,
99 static void rt6_dst_from_metrics_check(struct rt6_info
*rt
);
100 static int rt6_score_route(struct rt6_info
*rt
, int oif
, int strict
);
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
104 const struct in6_addr
*prefix
, int prefixlen
,
105 const struct in6_addr
*gwaddr
, int ifindex
,
107 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
108 const struct in6_addr
*prefix
, int prefixlen
,
109 const struct in6_addr
*gwaddr
, int ifindex
);
112 struct uncached_list
{
114 struct list_head head
;
117 static DEFINE_PER_CPU_ALIGNED(struct uncached_list
, rt6_uncached_list
);
119 static void rt6_uncached_list_add(struct rt6_info
*rt
)
121 struct uncached_list
*ul
= raw_cpu_ptr(&rt6_uncached_list
);
123 rt
->dst
.flags
|= DST_NOCACHE
;
124 rt
->rt6i_uncached_list
= ul
;
126 spin_lock_bh(&ul
->lock
);
127 list_add_tail(&rt
->rt6i_uncached
, &ul
->head
);
128 spin_unlock_bh(&ul
->lock
);
131 static void rt6_uncached_list_del(struct rt6_info
*rt
)
133 if (!list_empty(&rt
->rt6i_uncached
)) {
134 struct uncached_list
*ul
= rt
->rt6i_uncached_list
;
136 spin_lock_bh(&ul
->lock
);
137 list_del(&rt
->rt6i_uncached
);
138 spin_unlock_bh(&ul
->lock
);
142 static void rt6_uncached_list_flush_dev(struct net
*net
, struct net_device
*dev
)
144 struct net_device
*loopback_dev
= net
->loopback_dev
;
147 if (dev
== loopback_dev
)
150 for_each_possible_cpu(cpu
) {
151 struct uncached_list
*ul
= per_cpu_ptr(&rt6_uncached_list
, cpu
);
154 spin_lock_bh(&ul
->lock
);
155 list_for_each_entry(rt
, &ul
->head
, rt6i_uncached
) {
156 struct inet6_dev
*rt_idev
= rt
->rt6i_idev
;
157 struct net_device
*rt_dev
= rt
->dst
.dev
;
159 if (rt_idev
->dev
== dev
) {
160 rt
->rt6i_idev
= in6_dev_get(loopback_dev
);
161 in6_dev_put(rt_idev
);
165 rt
->dst
.dev
= loopback_dev
;
166 dev_hold(rt
->dst
.dev
);
170 spin_unlock_bh(&ul
->lock
);
174 static u32
*rt6_pcpu_cow_metrics(struct rt6_info
*rt
)
176 return dst_metrics_write_ptr(rt
->dst
.from
);
179 static u32
*ipv6_cow_metrics(struct dst_entry
*dst
, unsigned long old
)
181 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
183 if (rt
->rt6i_flags
& RTF_PCPU
)
184 return rt6_pcpu_cow_metrics(rt
);
185 else if (rt
->rt6i_flags
& RTF_CACHE
)
188 return dst_cow_metrics_generic(dst
, old
);
191 static inline const void *choose_neigh_daddr(struct rt6_info
*rt
,
195 struct in6_addr
*p
= &rt
->rt6i_gateway
;
197 if (!ipv6_addr_any(p
))
198 return (const void *) p
;
200 return &ipv6_hdr(skb
)->daddr
;
204 static struct neighbour
*ip6_neigh_lookup(const struct dst_entry
*dst
,
208 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
211 daddr
= choose_neigh_daddr(rt
, skb
, daddr
);
212 n
= __ipv6_neigh_lookup(dst
->dev
, daddr
);
215 return neigh_create(&nd_tbl
, daddr
, dst
->dev
);
218 static struct dst_ops ip6_dst_ops_template
= {
222 .check
= ip6_dst_check
,
223 .default_advmss
= ip6_default_advmss
,
225 .cow_metrics
= ipv6_cow_metrics
,
226 .destroy
= ip6_dst_destroy
,
227 .ifdown
= ip6_dst_ifdown
,
228 .negative_advice
= ip6_negative_advice
,
229 .link_failure
= ip6_link_failure
,
230 .update_pmtu
= ip6_rt_update_pmtu
,
231 .redirect
= rt6_do_redirect
,
232 .local_out
= __ip6_local_out
,
233 .neigh_lookup
= ip6_neigh_lookup
,
236 static unsigned int ip6_blackhole_mtu(const struct dst_entry
*dst
)
238 unsigned int mtu
= dst_metric_raw(dst
, RTAX_MTU
);
240 return mtu
? : dst
->dev
->mtu
;
243 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
244 struct sk_buff
*skb
, u32 mtu
)
248 static void ip6_rt_blackhole_redirect(struct dst_entry
*dst
, struct sock
*sk
,
253 static struct dst_ops ip6_dst_blackhole_ops
= {
255 .destroy
= ip6_dst_destroy
,
256 .check
= ip6_dst_check
,
257 .mtu
= ip6_blackhole_mtu
,
258 .default_advmss
= ip6_default_advmss
,
259 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
260 .redirect
= ip6_rt_blackhole_redirect
,
261 .cow_metrics
= dst_cow_metrics_generic
,
262 .neigh_lookup
= ip6_neigh_lookup
,
265 static const u32 ip6_template_metrics
[RTAX_MAX
] = {
266 [RTAX_HOPLIMIT
- 1] = 0,
269 static const struct rt6_info ip6_null_entry_template
= {
271 .__refcnt
= ATOMIC_INIT(1),
273 .obsolete
= DST_OBSOLETE_FORCE_CHK
,
274 .error
= -ENETUNREACH
,
275 .input
= ip6_pkt_discard
,
276 .output
= ip6_pkt_discard_out
,
278 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
279 .rt6i_protocol
= RTPROT_KERNEL
,
280 .rt6i_metric
= ~(u32
) 0,
281 .rt6i_ref
= ATOMIC_INIT(1),
284 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
286 static const struct rt6_info ip6_prohibit_entry_template
= {
288 .__refcnt
= ATOMIC_INIT(1),
290 .obsolete
= DST_OBSOLETE_FORCE_CHK
,
292 .input
= ip6_pkt_prohibit
,
293 .output
= ip6_pkt_prohibit_out
,
295 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
296 .rt6i_protocol
= RTPROT_KERNEL
,
297 .rt6i_metric
= ~(u32
) 0,
298 .rt6i_ref
= ATOMIC_INIT(1),
301 static const struct rt6_info ip6_blk_hole_entry_template
= {
303 .__refcnt
= ATOMIC_INIT(1),
305 .obsolete
= DST_OBSOLETE_FORCE_CHK
,
307 .input
= dst_discard
,
308 .output
= dst_discard_out
,
310 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
311 .rt6i_protocol
= RTPROT_KERNEL
,
312 .rt6i_metric
= ~(u32
) 0,
313 .rt6i_ref
= ATOMIC_INIT(1),
318 static void rt6_info_init(struct rt6_info
*rt
)
320 struct dst_entry
*dst
= &rt
->dst
;
322 memset(dst
+ 1, 0, sizeof(*rt
) - sizeof(*dst
));
323 INIT_LIST_HEAD(&rt
->rt6i_siblings
);
324 INIT_LIST_HEAD(&rt
->rt6i_uncached
);
327 /* allocate dst with ip6_dst_ops */
328 static struct rt6_info
*__ip6_dst_alloc(struct net
*net
,
329 struct net_device
*dev
,
332 struct rt6_info
*rt
= dst_alloc(&net
->ipv6
.ip6_dst_ops
, dev
,
333 0, DST_OBSOLETE_FORCE_CHK
, flags
);
341 static struct rt6_info
*ip6_dst_alloc(struct net
*net
,
342 struct net_device
*dev
,
345 struct rt6_info
*rt
= __ip6_dst_alloc(net
, dev
, flags
);
348 rt
->rt6i_pcpu
= alloc_percpu_gfp(struct rt6_info
*, GFP_ATOMIC
);
352 for_each_possible_cpu(cpu
) {
355 p
= per_cpu_ptr(rt
->rt6i_pcpu
, cpu
);
356 /* no one shares rt */
360 dst_destroy((struct dst_entry
*)rt
);
368 static void ip6_dst_destroy(struct dst_entry
*dst
)
370 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
371 struct dst_entry
*from
= dst
->from
;
372 struct inet6_dev
*idev
;
374 dst_destroy_metrics_generic(dst
);
375 free_percpu(rt
->rt6i_pcpu
);
376 rt6_uncached_list_del(rt
);
378 idev
= rt
->rt6i_idev
;
380 rt
->rt6i_idev
= NULL
;
388 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
391 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
392 struct inet6_dev
*idev
= rt
->rt6i_idev
;
393 struct net_device
*loopback_dev
=
394 dev_net(dev
)->loopback_dev
;
396 if (dev
!= loopback_dev
) {
397 if (idev
&& idev
->dev
== dev
) {
398 struct inet6_dev
*loopback_idev
=
399 in6_dev_get(loopback_dev
);
401 rt
->rt6i_idev
= loopback_idev
;
408 static bool __rt6_check_expired(const struct rt6_info
*rt
)
410 if (rt
->rt6i_flags
& RTF_EXPIRES
)
411 return time_after(jiffies
, rt
->dst
.expires
);
416 static bool rt6_check_expired(const struct rt6_info
*rt
)
418 if (rt
->rt6i_flags
& RTF_EXPIRES
) {
419 if (time_after(jiffies
, rt
->dst
.expires
))
421 } else if (rt
->dst
.from
) {
422 return rt6_check_expired((struct rt6_info
*) rt
->dst
.from
);
427 /* Multipath route selection:
428 * Hash based function using packet header and flowlabel.
429 * Adapted from fib_info_hashfn()
431 static int rt6_info_hash_nhsfn(unsigned int candidate_count
,
432 const struct flowi6
*fl6
)
434 return get_hash_from_flowi6(fl6
) % candidate_count
;
437 static struct rt6_info
*rt6_multipath_select(struct rt6_info
*match
,
438 struct flowi6
*fl6
, int oif
,
441 struct rt6_info
*sibling
, *next_sibling
;
444 route_choosen
= rt6_info_hash_nhsfn(match
->rt6i_nsiblings
+ 1, fl6
);
445 /* Don't change the route, if route_choosen == 0
446 * (siblings does not include ourself)
449 list_for_each_entry_safe(sibling
, next_sibling
,
450 &match
->rt6i_siblings
, rt6i_siblings
) {
452 if (route_choosen
== 0) {
453 if (rt6_score_route(sibling
, oif
, strict
) < 0)
463 * Route lookup. Any table->tb6_lock is implied.
466 static inline struct rt6_info
*rt6_device_match(struct net
*net
,
468 const struct in6_addr
*saddr
,
472 struct rt6_info
*local
= NULL
;
473 struct rt6_info
*sprt
;
475 if (!oif
&& ipv6_addr_any(saddr
))
478 for (sprt
= rt
; sprt
; sprt
= sprt
->dst
.rt6_next
) {
479 struct net_device
*dev
= sprt
->dst
.dev
;
482 if (dev
->ifindex
== oif
)
484 if (dev
->flags
& IFF_LOOPBACK
) {
485 if (!sprt
->rt6i_idev
||
486 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
487 if (flags
& RT6_LOOKUP_F_IFACE
)
490 local
->rt6i_idev
->dev
->ifindex
== oif
)
496 if (ipv6_chk_addr(net
, saddr
, dev
,
497 flags
& RT6_LOOKUP_F_IFACE
))
506 if (flags
& RT6_LOOKUP_F_IFACE
)
507 return net
->ipv6
.ip6_null_entry
;
513 #ifdef CONFIG_IPV6_ROUTER_PREF
514 struct __rt6_probe_work
{
515 struct work_struct work
;
516 struct in6_addr target
;
517 struct net_device
*dev
;
520 static void rt6_probe_deferred(struct work_struct
*w
)
522 struct in6_addr mcaddr
;
523 struct __rt6_probe_work
*work
=
524 container_of(w
, struct __rt6_probe_work
, work
);
526 addrconf_addr_solict_mult(&work
->target
, &mcaddr
);
527 ndisc_send_ns(work
->dev
, &work
->target
, &mcaddr
, NULL
);
532 static void rt6_probe(struct rt6_info
*rt
)
534 struct __rt6_probe_work
*work
;
535 struct neighbour
*neigh
;
537 * Okay, this does not seem to be appropriate
538 * for now, however, we need to check if it
539 * is really so; aka Router Reachability Probing.
541 * Router Reachability Probe MUST be rate-limited
542 * to no more than one per minute.
544 if (!rt
|| !(rt
->rt6i_flags
& RTF_GATEWAY
))
547 neigh
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
, &rt
->rt6i_gateway
);
549 if (neigh
->nud_state
& NUD_VALID
)
553 write_lock(&neigh
->lock
);
554 if (!(neigh
->nud_state
& NUD_VALID
) &&
557 rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
558 work
= kmalloc(sizeof(*work
), GFP_ATOMIC
);
560 __neigh_set_probe_once(neigh
);
562 write_unlock(&neigh
->lock
);
564 work
= kmalloc(sizeof(*work
), GFP_ATOMIC
);
568 INIT_WORK(&work
->work
, rt6_probe_deferred
);
569 work
->target
= rt
->rt6i_gateway
;
570 dev_hold(rt
->dst
.dev
);
571 work
->dev
= rt
->dst
.dev
;
572 schedule_work(&work
->work
);
576 rcu_read_unlock_bh();
579 static inline void rt6_probe(struct rt6_info
*rt
)
585 * Default Router Selection (RFC 2461 6.3.6)
587 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
589 struct net_device
*dev
= rt
->dst
.dev
;
590 if (!oif
|| dev
->ifindex
== oif
)
592 if ((dev
->flags
& IFF_LOOPBACK
) &&
593 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
598 static inline enum rt6_nud_state
rt6_check_neigh(struct rt6_info
*rt
)
600 struct neighbour
*neigh
;
601 enum rt6_nud_state ret
= RT6_NUD_FAIL_HARD
;
603 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
604 !(rt
->rt6i_flags
& RTF_GATEWAY
))
605 return RT6_NUD_SUCCEED
;
608 neigh
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
, &rt
->rt6i_gateway
);
610 read_lock(&neigh
->lock
);
611 if (neigh
->nud_state
& NUD_VALID
)
612 ret
= RT6_NUD_SUCCEED
;
613 #ifdef CONFIG_IPV6_ROUTER_PREF
614 else if (!(neigh
->nud_state
& NUD_FAILED
))
615 ret
= RT6_NUD_SUCCEED
;
617 ret
= RT6_NUD_FAIL_PROBE
;
619 read_unlock(&neigh
->lock
);
621 ret
= IS_ENABLED(CONFIG_IPV6_ROUTER_PREF
) ?
622 RT6_NUD_SUCCEED
: RT6_NUD_FAIL_DO_RR
;
624 rcu_read_unlock_bh();
629 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
634 m
= rt6_check_dev(rt
, oif
);
635 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
636 return RT6_NUD_FAIL_HARD
;
637 #ifdef CONFIG_IPV6_ROUTER_PREF
638 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
640 if (strict
& RT6_LOOKUP_F_REACHABLE
) {
641 int n
= rt6_check_neigh(rt
);
648 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
649 int *mpri
, struct rt6_info
*match
,
653 bool match_do_rr
= false;
654 struct inet6_dev
*idev
= rt
->rt6i_idev
;
655 struct net_device
*dev
= rt
->dst
.dev
;
657 if (dev
&& !netif_carrier_ok(dev
) &&
658 idev
->cnf
.ignore_routes_with_linkdown
)
661 if (rt6_check_expired(rt
))
664 m
= rt6_score_route(rt
, oif
, strict
);
665 if (m
== RT6_NUD_FAIL_DO_RR
) {
667 m
= 0; /* lowest valid score */
668 } else if (m
== RT6_NUD_FAIL_HARD
) {
672 if (strict
& RT6_LOOKUP_F_REACHABLE
)
675 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
677 *do_rr
= match_do_rr
;
685 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
686 struct rt6_info
*rr_head
,
687 u32 metric
, int oif
, int strict
,
690 struct rt6_info
*rt
, *match
, *cont
;
695 for (rt
= rr_head
; rt
; rt
= rt
->dst
.rt6_next
) {
696 if (rt
->rt6i_metric
!= metric
) {
701 match
= find_match(rt
, oif
, strict
, &mpri
, match
, do_rr
);
704 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
; rt
= rt
->dst
.rt6_next
) {
705 if (rt
->rt6i_metric
!= metric
) {
710 match
= find_match(rt
, oif
, strict
, &mpri
, match
, do_rr
);
716 for (rt
= cont
; rt
; rt
= rt
->dst
.rt6_next
)
717 match
= find_match(rt
, oif
, strict
, &mpri
, match
, do_rr
);
722 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
724 struct rt6_info
*match
, *rt0
;
730 fn
->rr_ptr
= rt0
= fn
->leaf
;
732 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
,
736 struct rt6_info
*next
= rt0
->dst
.rt6_next
;
738 /* no entries matched; do round-robin */
739 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
746 net
= dev_net(rt0
->dst
.dev
);
747 return match
? match
: net
->ipv6
.ip6_null_entry
;
750 static bool rt6_is_gw_or_nonexthop(const struct rt6_info
*rt
)
752 return (rt
->rt6i_flags
& (RTF_NONEXTHOP
| RTF_GATEWAY
));
755 #ifdef CONFIG_IPV6_ROUTE_INFO
756 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
757 const struct in6_addr
*gwaddr
)
759 struct net
*net
= dev_net(dev
);
760 struct route_info
*rinfo
= (struct route_info
*) opt
;
761 struct in6_addr prefix_buf
, *prefix
;
763 unsigned long lifetime
;
766 if (len
< sizeof(struct route_info
)) {
770 /* Sanity check for prefix_len and length */
771 if (rinfo
->length
> 3) {
773 } else if (rinfo
->prefix_len
> 128) {
775 } else if (rinfo
->prefix_len
> 64) {
776 if (rinfo
->length
< 2) {
779 } else if (rinfo
->prefix_len
> 0) {
780 if (rinfo
->length
< 1) {
785 pref
= rinfo
->route_pref
;
786 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
789 lifetime
= addrconf_timeout_fixup(ntohl(rinfo
->lifetime
), HZ
);
791 if (rinfo
->length
== 3)
792 prefix
= (struct in6_addr
*)rinfo
->prefix
;
794 /* this function is safe */
795 ipv6_addr_prefix(&prefix_buf
,
796 (struct in6_addr
*)rinfo
->prefix
,
798 prefix
= &prefix_buf
;
801 if (rinfo
->prefix_len
== 0)
802 rt
= rt6_get_dflt_router(gwaddr
, dev
);
804 rt
= rt6_get_route_info(net
, prefix
, rinfo
->prefix_len
,
805 gwaddr
, dev
->ifindex
);
807 if (rt
&& !lifetime
) {
813 rt
= rt6_add_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
,
816 rt
->rt6i_flags
= RTF_ROUTEINFO
|
817 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
820 if (!addrconf_finite_timeout(lifetime
))
821 rt6_clean_expires(rt
);
823 rt6_set_expires(rt
, jiffies
+ HZ
* lifetime
);
831 static struct fib6_node
* fib6_backtrack(struct fib6_node
*fn
,
832 struct in6_addr
*saddr
)
834 struct fib6_node
*pn
;
836 if (fn
->fn_flags
& RTN_TL_ROOT
)
839 if (FIB6_SUBTREE(pn
) && FIB6_SUBTREE(pn
) != fn
)
840 fn
= fib6_lookup(FIB6_SUBTREE(pn
), NULL
, saddr
);
843 if (fn
->fn_flags
& RTN_RTINFO
)
848 static struct rt6_info
*ip6_pol_route_lookup(struct net
*net
,
849 struct fib6_table
*table
,
850 struct flowi6
*fl6
, int flags
)
852 struct fib6_node
*fn
;
855 read_lock_bh(&table
->tb6_lock
);
856 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
859 rt
= rt6_device_match(net
, rt
, &fl6
->saddr
, fl6
->flowi6_oif
, flags
);
860 if (rt
->rt6i_nsiblings
&& fl6
->flowi6_oif
== 0)
861 rt
= rt6_multipath_select(rt
, fl6
, fl6
->flowi6_oif
, flags
);
862 if (rt
== net
->ipv6
.ip6_null_entry
) {
863 fn
= fib6_backtrack(fn
, &fl6
->saddr
);
867 dst_use(&rt
->dst
, jiffies
);
868 read_unlock_bh(&table
->tb6_lock
);
870 trace_fib6_table_lookup(net
, rt
, table
->tb6_id
, fl6
);
876 struct dst_entry
*ip6_route_lookup(struct net
*net
, struct flowi6
*fl6
,
879 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_lookup
);
881 EXPORT_SYMBOL_GPL(ip6_route_lookup
);
883 struct rt6_info
*rt6_lookup(struct net
*net
, const struct in6_addr
*daddr
,
884 const struct in6_addr
*saddr
, int oif
, int strict
)
886 struct flowi6 fl6
= {
890 struct dst_entry
*dst
;
891 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
894 memcpy(&fl6
.saddr
, saddr
, sizeof(*saddr
));
895 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
898 dst
= fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_lookup
);
900 return (struct rt6_info
*) dst
;
906 EXPORT_SYMBOL(rt6_lookup
);
908 /* ip6_ins_rt is called with FREE table->tb6_lock.
909 It takes new route entry, the addition fails by any reason the
910 route is freed. In any case, if caller does not hold it, it may
914 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
,
915 struct mx6_config
*mxc
)
918 struct fib6_table
*table
;
920 table
= rt
->rt6i_table
;
921 write_lock_bh(&table
->tb6_lock
);
922 err
= fib6_add(&table
->tb6_root
, rt
, info
, mxc
);
923 write_unlock_bh(&table
->tb6_lock
);
928 int ip6_ins_rt(struct rt6_info
*rt
)
930 struct nl_info info
= { .nl_net
= dev_net(rt
->dst
.dev
), };
931 struct mx6_config mxc
= { .mx
= NULL
, };
933 return __ip6_ins_rt(rt
, &info
, &mxc
);
936 static struct rt6_info
*ip6_rt_cache_alloc(struct rt6_info
*ort
,
937 const struct in6_addr
*daddr
,
938 const struct in6_addr
*saddr
)
946 if (ort
->rt6i_flags
& (RTF_CACHE
| RTF_PCPU
))
947 ort
= (struct rt6_info
*)ort
->dst
.from
;
949 rt
= __ip6_dst_alloc(dev_net(ort
->dst
.dev
), ort
->dst
.dev
, 0);
954 ip6_rt_copy_init(rt
, ort
);
955 rt
->rt6i_flags
|= RTF_CACHE
;
957 rt
->dst
.flags
|= DST_HOST
;
958 rt
->rt6i_dst
.addr
= *daddr
;
959 rt
->rt6i_dst
.plen
= 128;
961 if (!rt6_is_gw_or_nonexthop(ort
)) {
962 if (ort
->rt6i_dst
.plen
!= 128 &&
963 ipv6_addr_equal(&ort
->rt6i_dst
.addr
, daddr
))
964 rt
->rt6i_flags
|= RTF_ANYCAST
;
965 #ifdef CONFIG_IPV6_SUBTREES
966 if (rt
->rt6i_src
.plen
&& saddr
) {
967 rt
->rt6i_src
.addr
= *saddr
;
968 rt
->rt6i_src
.plen
= 128;
976 static struct rt6_info
*ip6_rt_pcpu_alloc(struct rt6_info
*rt
)
978 struct rt6_info
*pcpu_rt
;
980 pcpu_rt
= __ip6_dst_alloc(dev_net(rt
->dst
.dev
),
981 rt
->dst
.dev
, rt
->dst
.flags
);
985 ip6_rt_copy_init(pcpu_rt
, rt
);
986 pcpu_rt
->rt6i_protocol
= rt
->rt6i_protocol
;
987 pcpu_rt
->rt6i_flags
|= RTF_PCPU
;
991 /* It should be called with read_lock_bh(&tb6_lock) acquired */
992 static struct rt6_info
*rt6_get_pcpu_route(struct rt6_info
*rt
)
994 struct rt6_info
*pcpu_rt
, **p
;
996 p
= this_cpu_ptr(rt
->rt6i_pcpu
);
1000 dst_hold(&pcpu_rt
->dst
);
1001 rt6_dst_from_metrics_check(pcpu_rt
);
1006 static struct rt6_info
*rt6_make_pcpu_route(struct rt6_info
*rt
)
1008 struct fib6_table
*table
= rt
->rt6i_table
;
1009 struct rt6_info
*pcpu_rt
, *prev
, **p
;
1011 pcpu_rt
= ip6_rt_pcpu_alloc(rt
);
1013 struct net
*net
= dev_net(rt
->dst
.dev
);
1015 dst_hold(&net
->ipv6
.ip6_null_entry
->dst
);
1016 return net
->ipv6
.ip6_null_entry
;
1019 read_lock_bh(&table
->tb6_lock
);
1020 if (rt
->rt6i_pcpu
) {
1021 p
= this_cpu_ptr(rt
->rt6i_pcpu
);
1022 prev
= cmpxchg(p
, NULL
, pcpu_rt
);
1024 /* If someone did it before us, return prev instead */
1025 dst_destroy(&pcpu_rt
->dst
);
1029 /* rt has been removed from the fib6 tree
1030 * before we have a chance to acquire the read_lock.
1031 * In this case, don't brother to create a pcpu rt
1032 * since rt is going away anyway. The next
1033 * dst_check() will trigger a re-lookup.
1035 dst_destroy(&pcpu_rt
->dst
);
1038 dst_hold(&pcpu_rt
->dst
);
1039 rt6_dst_from_metrics_check(pcpu_rt
);
1040 read_unlock_bh(&table
->tb6_lock
);
1044 static struct rt6_info
*ip6_pol_route(struct net
*net
, struct fib6_table
*table
, int oif
,
1045 struct flowi6
*fl6
, int flags
)
1047 struct fib6_node
*fn
, *saved_fn
;
1048 struct rt6_info
*rt
;
1051 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
1052 if (net
->ipv6
.devconf_all
->forwarding
== 0)
1053 strict
|= RT6_LOOKUP_F_REACHABLE
;
1055 read_lock_bh(&table
->tb6_lock
);
1057 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
1060 if (fl6
->flowi6_flags
& FLOWI_FLAG_SKIP_NH_OIF
)
1064 rt
= rt6_select(fn
, oif
, strict
);
1065 if (rt
->rt6i_nsiblings
)
1066 rt
= rt6_multipath_select(rt
, fl6
, oif
, strict
);
1067 if (rt
== net
->ipv6
.ip6_null_entry
) {
1068 fn
= fib6_backtrack(fn
, &fl6
->saddr
);
1070 goto redo_rt6_select
;
1071 else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
1072 /* also consider unreachable route */
1073 strict
&= ~RT6_LOOKUP_F_REACHABLE
;
1075 goto redo_rt6_select
;
1080 if (rt
== net
->ipv6
.ip6_null_entry
|| (rt
->rt6i_flags
& RTF_CACHE
)) {
1081 dst_use(&rt
->dst
, jiffies
);
1082 read_unlock_bh(&table
->tb6_lock
);
1084 rt6_dst_from_metrics_check(rt
);
1086 trace_fib6_table_lookup(net
, rt
, table
->tb6_id
, fl6
);
1088 } else if (unlikely((fl6
->flowi6_flags
& FLOWI_FLAG_KNOWN_NH
) &&
1089 !(rt
->rt6i_flags
& RTF_GATEWAY
))) {
1090 /* Create a RTF_CACHE clone which will not be
1091 * owned by the fib6 tree. It is for the special case where
1092 * the daddr in the skb during the neighbor look-up is different
1093 * from the fl6->daddr used to look-up route here.
1096 struct rt6_info
*uncached_rt
;
1098 dst_use(&rt
->dst
, jiffies
);
1099 read_unlock_bh(&table
->tb6_lock
);
1101 uncached_rt
= ip6_rt_cache_alloc(rt
, &fl6
->daddr
, NULL
);
1102 dst_release(&rt
->dst
);
1105 rt6_uncached_list_add(uncached_rt
);
1107 uncached_rt
= net
->ipv6
.ip6_null_entry
;
1109 dst_hold(&uncached_rt
->dst
);
1111 trace_fib6_table_lookup(net
, uncached_rt
, table
->tb6_id
, fl6
);
1115 /* Get a percpu copy */
1117 struct rt6_info
*pcpu_rt
;
1119 rt
->dst
.lastuse
= jiffies
;
1121 pcpu_rt
= rt6_get_pcpu_route(rt
);
1124 read_unlock_bh(&table
->tb6_lock
);
1126 /* We have to do the read_unlock first
1127 * because rt6_make_pcpu_route() may trigger
1128 * ip6_dst_gc() which will take the write_lock.
1131 read_unlock_bh(&table
->tb6_lock
);
1132 pcpu_rt
= rt6_make_pcpu_route(rt
);
1133 dst_release(&rt
->dst
);
1136 trace_fib6_table_lookup(net
, pcpu_rt
, table
->tb6_id
, fl6
);
1142 static struct rt6_info
*ip6_pol_route_input(struct net
*net
, struct fib6_table
*table
,
1143 struct flowi6
*fl6
, int flags
)
1145 return ip6_pol_route(net
, table
, fl6
->flowi6_iif
, fl6
, flags
);
1148 static struct dst_entry
*ip6_route_input_lookup(struct net
*net
,
1149 struct net_device
*dev
,
1150 struct flowi6
*fl6
, int flags
)
1152 if (rt6_need_strict(&fl6
->daddr
) && dev
->type
!= ARPHRD_PIMREG
)
1153 flags
|= RT6_LOOKUP_F_IFACE
;
1155 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_input
);
1158 void ip6_route_input(struct sk_buff
*skb
)
1160 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
1161 struct net
*net
= dev_net(skb
->dev
);
1162 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1163 struct ip_tunnel_info
*tun_info
;
1164 struct flowi6 fl6
= {
1165 .flowi6_iif
= l3mdev_fib_oif(skb
->dev
),
1166 .daddr
= iph
->daddr
,
1167 .saddr
= iph
->saddr
,
1168 .flowlabel
= ip6_flowinfo(iph
),
1169 .flowi6_mark
= skb
->mark
,
1170 .flowi6_proto
= iph
->nexthdr
,
1173 tun_info
= skb_tunnel_info(skb
);
1174 if (tun_info
&& !(tun_info
->mode
& IP_TUNNEL_INFO_TX
))
1175 fl6
.flowi6_tun_key
.tun_id
= tun_info
->key
.tun_id
;
1177 skb_dst_set(skb
, ip6_route_input_lookup(net
, skb
->dev
, &fl6
, flags
));
1180 static struct rt6_info
*ip6_pol_route_output(struct net
*net
, struct fib6_table
*table
,
1181 struct flowi6
*fl6
, int flags
)
1183 return ip6_pol_route(net
, table
, fl6
->flowi6_oif
, fl6
, flags
);
1186 struct dst_entry
*ip6_route_output(struct net
*net
, const struct sock
*sk
,
1189 struct dst_entry
*dst
;
1193 dst
= l3mdev_rt6_dst_by_oif(net
, fl6
);
1197 fl6
->flowi6_iif
= LOOPBACK_IFINDEX
;
1199 any_src
= ipv6_addr_any(&fl6
->saddr
);
1200 if ((sk
&& sk
->sk_bound_dev_if
) || rt6_need_strict(&fl6
->daddr
) ||
1201 (fl6
->flowi6_oif
&& any_src
))
1202 flags
|= RT6_LOOKUP_F_IFACE
;
1205 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
1207 flags
|= rt6_srcprefs2flags(inet6_sk(sk
)->srcprefs
);
1209 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_output
);
1211 EXPORT_SYMBOL(ip6_route_output
);
1213 struct dst_entry
*ip6_blackhole_route(struct net
*net
, struct dst_entry
*dst_orig
)
1215 struct rt6_info
*rt
, *ort
= (struct rt6_info
*) dst_orig
;
1216 struct dst_entry
*new = NULL
;
1218 rt
= dst_alloc(&ip6_dst_blackhole_ops
, ort
->dst
.dev
, 1, DST_OBSOLETE_NONE
, 0);
1224 new->input
= dst_discard
;
1225 new->output
= dst_discard_out
;
1227 dst_copy_metrics(new, &ort
->dst
);
1228 rt
->rt6i_idev
= ort
->rt6i_idev
;
1230 in6_dev_hold(rt
->rt6i_idev
);
1232 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
1233 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_PCPU
;
1234 rt
->rt6i_metric
= 0;
1236 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1237 #ifdef CONFIG_IPV6_SUBTREES
1238 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1244 dst_release(dst_orig
);
1245 return new ? new : ERR_PTR(-ENOMEM
);
1249 * Destination cache support functions
1252 static void rt6_dst_from_metrics_check(struct rt6_info
*rt
)
1255 dst_metrics_ptr(&rt
->dst
) != dst_metrics_ptr(rt
->dst
.from
))
1256 dst_init_metrics(&rt
->dst
, dst_metrics_ptr(rt
->dst
.from
), true);
1259 static struct dst_entry
*rt6_check(struct rt6_info
*rt
, u32 cookie
)
1261 if (!rt
->rt6i_node
|| (rt
->rt6i_node
->fn_sernum
!= cookie
))
1264 if (rt6_check_expired(rt
))
1270 static struct dst_entry
*rt6_dst_from_check(struct rt6_info
*rt
, u32 cookie
)
1272 if (!__rt6_check_expired(rt
) &&
1273 rt
->dst
.obsolete
== DST_OBSOLETE_FORCE_CHK
&&
1274 rt6_check((struct rt6_info
*)(rt
->dst
.from
), cookie
))
1280 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
1282 struct rt6_info
*rt
;
1284 rt
= (struct rt6_info
*) dst
;
1286 /* All IPV6 dsts are created with ->obsolete set to the value
1287 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288 * into this function always.
1291 rt6_dst_from_metrics_check(rt
);
1293 if (rt
->rt6i_flags
& RTF_PCPU
||
1294 (unlikely(dst
->flags
& DST_NOCACHE
) && rt
->dst
.from
))
1295 return rt6_dst_from_check(rt
, cookie
);
1297 return rt6_check(rt
, cookie
);
1300 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
1302 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1305 if (rt
->rt6i_flags
& RTF_CACHE
) {
1306 if (rt6_check_expired(rt
)) {
1318 static void ip6_link_failure(struct sk_buff
*skb
)
1320 struct rt6_info
*rt
;
1322 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0);
1324 rt
= (struct rt6_info
*) skb_dst(skb
);
1326 if (rt
->rt6i_flags
& RTF_CACHE
) {
1329 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
)) {
1330 rt
->rt6i_node
->fn_sernum
= -1;
1335 static void rt6_do_update_pmtu(struct rt6_info
*rt
, u32 mtu
)
1337 struct net
*net
= dev_net(rt
->dst
.dev
);
1339 rt
->rt6i_flags
|= RTF_MODIFIED
;
1340 rt
->rt6i_pmtu
= mtu
;
1341 rt6_update_expires(rt
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1344 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info
*rt
)
1346 return !(rt
->rt6i_flags
& RTF_CACHE
) &&
1347 (rt
->rt6i_flags
& RTF_PCPU
|| rt
->rt6i_node
);
1350 static void __ip6_rt_update_pmtu(struct dst_entry
*dst
, const struct sock
*sk
,
1351 const struct ipv6hdr
*iph
, u32 mtu
)
1353 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
1355 if (rt6
->rt6i_flags
& RTF_LOCAL
)
1359 mtu
= max_t(u32
, mtu
, IPV6_MIN_MTU
);
1360 if (mtu
>= dst_mtu(dst
))
1363 if (!rt6_cache_allowed_for_pmtu(rt6
)) {
1364 rt6_do_update_pmtu(rt6
, mtu
);
1366 const struct in6_addr
*daddr
, *saddr
;
1367 struct rt6_info
*nrt6
;
1370 daddr
= &iph
->daddr
;
1371 saddr
= &iph
->saddr
;
1373 daddr
= &sk
->sk_v6_daddr
;
1374 saddr
= &inet6_sk(sk
)->saddr
;
1378 nrt6
= ip6_rt_cache_alloc(rt6
, daddr
, saddr
);
1380 rt6_do_update_pmtu(nrt6
, mtu
);
1382 /* ip6_ins_rt(nrt6) will bump the
1383 * rt6->rt6i_node->fn_sernum
1384 * which will fail the next rt6_check() and
1385 * invalidate the sk->sk_dst_cache.
1392 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
1393 struct sk_buff
*skb
, u32 mtu
)
1395 __ip6_rt_update_pmtu(dst
, sk
, skb
? ipv6_hdr(skb
) : NULL
, mtu
);
1398 void ip6_update_pmtu(struct sk_buff
*skb
, struct net
*net
, __be32 mtu
,
1401 const struct ipv6hdr
*iph
= (struct ipv6hdr
*) skb
->data
;
1402 struct dst_entry
*dst
;
1405 memset(&fl6
, 0, sizeof(fl6
));
1406 fl6
.flowi6_oif
= oif
;
1407 fl6
.flowi6_mark
= mark
? mark
: IP6_REPLY_MARK(net
, skb
->mark
);
1408 fl6
.daddr
= iph
->daddr
;
1409 fl6
.saddr
= iph
->saddr
;
1410 fl6
.flowlabel
= ip6_flowinfo(iph
);
1412 dst
= ip6_route_output(net
, NULL
, &fl6
);
1414 __ip6_rt_update_pmtu(dst
, NULL
, iph
, ntohl(mtu
));
1417 EXPORT_SYMBOL_GPL(ip6_update_pmtu
);
1419 void ip6_sk_update_pmtu(struct sk_buff
*skb
, struct sock
*sk
, __be32 mtu
)
1421 ip6_update_pmtu(skb
, sock_net(sk
), mtu
,
1422 sk
->sk_bound_dev_if
, sk
->sk_mark
);
1424 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu
);
1426 /* Handle redirects */
1427 struct ip6rd_flowi
{
1429 struct in6_addr gateway
;
1432 static struct rt6_info
*__ip6_route_redirect(struct net
*net
,
1433 struct fib6_table
*table
,
1437 struct ip6rd_flowi
*rdfl
= (struct ip6rd_flowi
*)fl6
;
1438 struct rt6_info
*rt
;
1439 struct fib6_node
*fn
;
1441 /* Get the "current" route for this destination and
1442 * check if the redirect has come from approriate router.
1444 * RFC 4861 specifies that redirects should only be
1445 * accepted if they come from the nexthop to the target.
1446 * Due to the way the routes are chosen, this notion
1447 * is a bit fuzzy and one might need to check all possible
1451 read_lock_bh(&table
->tb6_lock
);
1452 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
1454 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1455 if (rt6_check_expired(rt
))
1459 if (!(rt
->rt6i_flags
& RTF_GATEWAY
))
1461 if (fl6
->flowi6_oif
!= rt
->dst
.dev
->ifindex
)
1463 if (!ipv6_addr_equal(&rdfl
->gateway
, &rt
->rt6i_gateway
))
1469 rt
= net
->ipv6
.ip6_null_entry
;
1470 else if (rt
->dst
.error
) {
1471 rt
= net
->ipv6
.ip6_null_entry
;
1475 if (rt
== net
->ipv6
.ip6_null_entry
) {
1476 fn
= fib6_backtrack(fn
, &fl6
->saddr
);
1484 read_unlock_bh(&table
->tb6_lock
);
1486 trace_fib6_table_lookup(net
, rt
, table
->tb6_id
, fl6
);
1490 static struct dst_entry
*ip6_route_redirect(struct net
*net
,
1491 const struct flowi6
*fl6
,
1492 const struct in6_addr
*gateway
)
1494 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1495 struct ip6rd_flowi rdfl
;
1498 rdfl
.gateway
= *gateway
;
1500 return fib6_rule_lookup(net
, &rdfl
.fl6
,
1501 flags
, __ip6_route_redirect
);
1504 void ip6_redirect(struct sk_buff
*skb
, struct net
*net
, int oif
, u32 mark
)
1506 const struct ipv6hdr
*iph
= (struct ipv6hdr
*) skb
->data
;
1507 struct dst_entry
*dst
;
1510 memset(&fl6
, 0, sizeof(fl6
));
1511 fl6
.flowi6_iif
= LOOPBACK_IFINDEX
;
1512 fl6
.flowi6_oif
= oif
;
1513 fl6
.flowi6_mark
= mark
;
1514 fl6
.daddr
= iph
->daddr
;
1515 fl6
.saddr
= iph
->saddr
;
1516 fl6
.flowlabel
= ip6_flowinfo(iph
);
1518 dst
= ip6_route_redirect(net
, &fl6
, &ipv6_hdr(skb
)->saddr
);
1519 rt6_do_redirect(dst
, NULL
, skb
);
1522 EXPORT_SYMBOL_GPL(ip6_redirect
);
1524 void ip6_redirect_no_header(struct sk_buff
*skb
, struct net
*net
, int oif
,
1527 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
1528 const struct rd_msg
*msg
= (struct rd_msg
*)icmp6_hdr(skb
);
1529 struct dst_entry
*dst
;
1532 memset(&fl6
, 0, sizeof(fl6
));
1533 fl6
.flowi6_iif
= LOOPBACK_IFINDEX
;
1534 fl6
.flowi6_oif
= oif
;
1535 fl6
.flowi6_mark
= mark
;
1536 fl6
.daddr
= msg
->dest
;
1537 fl6
.saddr
= iph
->daddr
;
1539 dst
= ip6_route_redirect(net
, &fl6
, &iph
->saddr
);
1540 rt6_do_redirect(dst
, NULL
, skb
);
1544 void ip6_sk_redirect(struct sk_buff
*skb
, struct sock
*sk
)
1546 ip6_redirect(skb
, sock_net(sk
), sk
->sk_bound_dev_if
, sk
->sk_mark
);
1548 EXPORT_SYMBOL_GPL(ip6_sk_redirect
);
1550 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
)
1552 struct net_device
*dev
= dst
->dev
;
1553 unsigned int mtu
= dst_mtu(dst
);
1554 struct net
*net
= dev_net(dev
);
1556 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
1558 if (mtu
< net
->ipv6
.sysctl
.ip6_rt_min_advmss
)
1559 mtu
= net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
1562 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1563 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1564 * IPV6_MAXPLEN is also valid and means: "any MSS,
1565 * rely only on pmtu discovery"
1567 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
1572 static unsigned int ip6_mtu(const struct dst_entry
*dst
)
1574 const struct rt6_info
*rt
= (const struct rt6_info
*)dst
;
1575 unsigned int mtu
= rt
->rt6i_pmtu
;
1576 struct inet6_dev
*idev
;
1581 mtu
= dst_metric_raw(dst
, RTAX_MTU
);
1588 idev
= __in6_dev_get(dst
->dev
);
1590 mtu
= idev
->cnf
.mtu6
;
1594 return min_t(unsigned int, mtu
, IP6_MAX_MTU
);
1597 static struct dst_entry
*icmp6_dst_gc_list
;
1598 static DEFINE_SPINLOCK(icmp6_dst_lock
);
1600 struct dst_entry
*icmp6_dst_alloc(struct net_device
*dev
,
1603 struct dst_entry
*dst
;
1604 struct rt6_info
*rt
;
1605 struct inet6_dev
*idev
= in6_dev_get(dev
);
1606 struct net
*net
= dev_net(dev
);
1608 if (unlikely(!idev
))
1609 return ERR_PTR(-ENODEV
);
1611 rt
= ip6_dst_alloc(net
, dev
, 0);
1612 if (unlikely(!rt
)) {
1614 dst
= ERR_PTR(-ENOMEM
);
1618 rt
->dst
.flags
|= DST_HOST
;
1619 rt
->dst
.output
= ip6_output
;
1620 atomic_set(&rt
->dst
.__refcnt
, 1);
1621 rt
->rt6i_gateway
= fl6
->daddr
;
1622 rt
->rt6i_dst
.addr
= fl6
->daddr
;
1623 rt
->rt6i_dst
.plen
= 128;
1624 rt
->rt6i_idev
= idev
;
1625 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, 0);
1627 spin_lock_bh(&icmp6_dst_lock
);
1628 rt
->dst
.next
= icmp6_dst_gc_list
;
1629 icmp6_dst_gc_list
= &rt
->dst
;
1630 spin_unlock_bh(&icmp6_dst_lock
);
1632 fib6_force_start_gc(net
);
1634 dst
= xfrm_lookup(net
, &rt
->dst
, flowi6_to_flowi(fl6
), NULL
, 0);
1640 int icmp6_dst_gc(void)
1642 struct dst_entry
*dst
, **pprev
;
1645 spin_lock_bh(&icmp6_dst_lock
);
1646 pprev
= &icmp6_dst_gc_list
;
1648 while ((dst
= *pprev
) != NULL
) {
1649 if (!atomic_read(&dst
->__refcnt
)) {
1658 spin_unlock_bh(&icmp6_dst_lock
);
1663 static void icmp6_clean_all(int (*func
)(struct rt6_info
*rt
, void *arg
),
1666 struct dst_entry
*dst
, **pprev
;
1668 spin_lock_bh(&icmp6_dst_lock
);
1669 pprev
= &icmp6_dst_gc_list
;
1670 while ((dst
= *pprev
) != NULL
) {
1671 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1672 if (func(rt
, arg
)) {
1679 spin_unlock_bh(&icmp6_dst_lock
);
1682 static int ip6_dst_gc(struct dst_ops
*ops
)
1684 struct net
*net
= container_of(ops
, struct net
, ipv6
.ip6_dst_ops
);
1685 int rt_min_interval
= net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
1686 int rt_max_size
= net
->ipv6
.sysctl
.ip6_rt_max_size
;
1687 int rt_elasticity
= net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
1688 int rt_gc_timeout
= net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
1689 unsigned long rt_last_gc
= net
->ipv6
.ip6_rt_last_gc
;
1692 entries
= dst_entries_get_fast(ops
);
1693 if (time_after(rt_last_gc
+ rt_min_interval
, jiffies
) &&
1694 entries
<= rt_max_size
)
1697 net
->ipv6
.ip6_rt_gc_expire
++;
1698 fib6_run_gc(net
->ipv6
.ip6_rt_gc_expire
, net
, true);
1699 entries
= dst_entries_get_slow(ops
);
1700 if (entries
< ops
->gc_thresh
)
1701 net
->ipv6
.ip6_rt_gc_expire
= rt_gc_timeout
>>1;
1703 net
->ipv6
.ip6_rt_gc_expire
-= net
->ipv6
.ip6_rt_gc_expire
>>rt_elasticity
;
1704 return entries
> rt_max_size
;
1707 static int ip6_convert_metrics(struct mx6_config
*mxc
,
1708 const struct fib6_config
*cfg
)
1710 bool ecn_ca
= false;
1718 mp
= kzalloc(sizeof(u32
) * RTAX_MAX
, GFP_KERNEL
);
1722 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1723 int type
= nla_type(nla
);
1728 if (unlikely(type
> RTAX_MAX
))
1731 if (type
== RTAX_CC_ALGO
) {
1732 char tmp
[TCP_CA_NAME_MAX
];
1734 nla_strlcpy(tmp
, nla
, sizeof(tmp
));
1735 val
= tcp_ca_get_key_by_name(tmp
, &ecn_ca
);
1736 if (val
== TCP_CA_UNSPEC
)
1739 val
= nla_get_u32(nla
);
1741 if (type
== RTAX_FEATURES
&& (val
& ~RTAX_FEATURE_MASK
))
1745 __set_bit(type
- 1, mxc
->mx_valid
);
1749 __set_bit(RTAX_FEATURES
- 1, mxc
->mx_valid
);
1750 mp
[RTAX_FEATURES
- 1] |= DST_FEATURE_ECN_CA
;
1760 static struct rt6_info
*ip6_route_info_create(struct fib6_config
*cfg
)
1762 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
1763 struct rt6_info
*rt
= NULL
;
1764 struct net_device
*dev
= NULL
;
1765 struct inet6_dev
*idev
= NULL
;
1766 struct fib6_table
*table
;
1770 if (cfg
->fc_dst_len
> 128 || cfg
->fc_src_len
> 128)
1772 #ifndef CONFIG_IPV6_SUBTREES
1773 if (cfg
->fc_src_len
)
1776 if (cfg
->fc_ifindex
) {
1778 dev
= dev_get_by_index(net
, cfg
->fc_ifindex
);
1781 idev
= in6_dev_get(dev
);
1786 if (cfg
->fc_metric
== 0)
1787 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1790 if (cfg
->fc_nlinfo
.nlh
&&
1791 !(cfg
->fc_nlinfo
.nlh
->nlmsg_flags
& NLM_F_CREATE
)) {
1792 table
= fib6_get_table(net
, cfg
->fc_table
);
1794 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1795 table
= fib6_new_table(net
, cfg
->fc_table
);
1798 table
= fib6_new_table(net
, cfg
->fc_table
);
1804 rt
= ip6_dst_alloc(net
, NULL
,
1805 (cfg
->fc_flags
& RTF_ADDRCONF
) ? 0 : DST_NOCOUNT
);
1812 if (cfg
->fc_flags
& RTF_EXPIRES
)
1813 rt6_set_expires(rt
, jiffies
+
1814 clock_t_to_jiffies(cfg
->fc_expires
));
1816 rt6_clean_expires(rt
);
1818 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1819 cfg
->fc_protocol
= RTPROT_BOOT
;
1820 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1822 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1824 if (addr_type
& IPV6_ADDR_MULTICAST
)
1825 rt
->dst
.input
= ip6_mc_input
;
1826 else if (cfg
->fc_flags
& RTF_LOCAL
)
1827 rt
->dst
.input
= ip6_input
;
1829 rt
->dst
.input
= ip6_forward
;
1831 rt
->dst
.output
= ip6_output
;
1833 if (cfg
->fc_encap
) {
1834 struct lwtunnel_state
*lwtstate
;
1836 err
= lwtunnel_build_state(dev
, cfg
->fc_encap_type
,
1837 cfg
->fc_encap
, AF_INET6
, cfg
,
1841 rt
->dst
.lwtstate
= lwtstate_get(lwtstate
);
1842 if (lwtunnel_output_redirect(rt
->dst
.lwtstate
)) {
1843 rt
->dst
.lwtstate
->orig_output
= rt
->dst
.output
;
1844 rt
->dst
.output
= lwtunnel_output
;
1846 if (lwtunnel_input_redirect(rt
->dst
.lwtstate
)) {
1847 rt
->dst
.lwtstate
->orig_input
= rt
->dst
.input
;
1848 rt
->dst
.input
= lwtunnel_input
;
1852 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
1853 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
1854 if (rt
->rt6i_dst
.plen
== 128)
1855 rt
->dst
.flags
|= DST_HOST
;
1857 #ifdef CONFIG_IPV6_SUBTREES
1858 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
1859 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
1862 rt
->rt6i_metric
= cfg
->fc_metric
;
1864 /* We cannot add true routes via loopback here,
1865 they would result in kernel looping; promote them to reject routes
1867 if ((cfg
->fc_flags
& RTF_REJECT
) ||
1868 (dev
&& (dev
->flags
& IFF_LOOPBACK
) &&
1869 !(addr_type
& IPV6_ADDR_LOOPBACK
) &&
1870 !(cfg
->fc_flags
& RTF_LOCAL
))) {
1871 /* hold loopback dev/idev if we haven't done so. */
1872 if (dev
!= net
->loopback_dev
) {
1877 dev
= net
->loopback_dev
;
1879 idev
= in6_dev_get(dev
);
1885 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
1886 switch (cfg
->fc_type
) {
1888 rt
->dst
.error
= -EINVAL
;
1889 rt
->dst
.output
= dst_discard_out
;
1890 rt
->dst
.input
= dst_discard
;
1893 rt
->dst
.error
= -EACCES
;
1894 rt
->dst
.output
= ip6_pkt_prohibit_out
;
1895 rt
->dst
.input
= ip6_pkt_prohibit
;
1898 case RTN_UNREACHABLE
:
1900 rt
->dst
.error
= (cfg
->fc_type
== RTN_THROW
) ? -EAGAIN
1901 : (cfg
->fc_type
== RTN_UNREACHABLE
)
1902 ? -EHOSTUNREACH
: -ENETUNREACH
;
1903 rt
->dst
.output
= ip6_pkt_discard_out
;
1904 rt
->dst
.input
= ip6_pkt_discard
;
1910 if (cfg
->fc_flags
& RTF_GATEWAY
) {
1911 const struct in6_addr
*gw_addr
;
1914 gw_addr
= &cfg
->fc_gateway
;
1915 gwa_type
= ipv6_addr_type(gw_addr
);
1917 /* if gw_addr is local we will fail to detect this in case
1918 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1919 * will return already-added prefix route via interface that
1920 * prefix route was assigned to, which might be non-loopback.
1923 if (ipv6_chk_addr_and_flags(net
, gw_addr
,
1924 gwa_type
& IPV6_ADDR_LINKLOCAL
?
1928 rt
->rt6i_gateway
= *gw_addr
;
1930 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
1931 struct rt6_info
*grt
;
1933 /* IPv6 strictly inhibits using not link-local
1934 addresses as nexthop address.
1935 Otherwise, router will not able to send redirects.
1936 It is very good, but in some (rare!) circumstances
1937 (SIT, PtP, NBMA NOARP links) it is handy to allow
1938 some exceptions. --ANK
1940 if (!(gwa_type
& IPV6_ADDR_UNICAST
))
1943 grt
= rt6_lookup(net
, gw_addr
, NULL
, cfg
->fc_ifindex
, 1);
1945 err
= -EHOSTUNREACH
;
1949 if (dev
!= grt
->dst
.dev
) {
1955 idev
= grt
->rt6i_idev
;
1957 in6_dev_hold(grt
->rt6i_idev
);
1959 if (!(grt
->rt6i_flags
& RTF_GATEWAY
))
1967 if (!dev
|| (dev
->flags
& IFF_LOOPBACK
))
1975 if (!ipv6_addr_any(&cfg
->fc_prefsrc
)) {
1976 if (!ipv6_chk_addr(net
, &cfg
->fc_prefsrc
, dev
, 0)) {
1980 rt
->rt6i_prefsrc
.addr
= cfg
->fc_prefsrc
;
1981 rt
->rt6i_prefsrc
.plen
= 128;
1983 rt
->rt6i_prefsrc
.plen
= 0;
1985 rt
->rt6i_flags
= cfg
->fc_flags
;
1989 rt
->rt6i_idev
= idev
;
1990 rt
->rt6i_table
= table
;
1992 cfg
->fc_nlinfo
.nl_net
= dev_net(dev
);
2003 return ERR_PTR(err
);
2006 int ip6_route_add(struct fib6_config
*cfg
)
2008 struct mx6_config mxc
= { .mx
= NULL
, };
2009 struct rt6_info
*rt
;
2012 rt
= ip6_route_info_create(cfg
);
2019 err
= ip6_convert_metrics(&mxc
, cfg
);
2023 err
= __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
, &mxc
);
2035 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
2038 struct fib6_table
*table
;
2039 struct net
*net
= dev_net(rt
->dst
.dev
);
2041 if (rt
== net
->ipv6
.ip6_null_entry
||
2042 rt
->dst
.flags
& DST_NOCACHE
) {
2047 table
= rt
->rt6i_table
;
2048 write_lock_bh(&table
->tb6_lock
);
2049 err
= fib6_del(rt
, info
);
2050 write_unlock_bh(&table
->tb6_lock
);
2057 int ip6_del_rt(struct rt6_info
*rt
)
2059 struct nl_info info
= {
2060 .nl_net
= dev_net(rt
->dst
.dev
),
2062 return __ip6_del_rt(rt
, &info
);
2065 static int ip6_route_del(struct fib6_config
*cfg
)
2067 struct fib6_table
*table
;
2068 struct fib6_node
*fn
;
2069 struct rt6_info
*rt
;
2072 table
= fib6_get_table(cfg
->fc_nlinfo
.nl_net
, cfg
->fc_table
);
2076 read_lock_bh(&table
->tb6_lock
);
2078 fn
= fib6_locate(&table
->tb6_root
,
2079 &cfg
->fc_dst
, cfg
->fc_dst_len
,
2080 &cfg
->fc_src
, cfg
->fc_src_len
);
2083 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2084 if ((rt
->rt6i_flags
& RTF_CACHE
) &&
2085 !(cfg
->fc_flags
& RTF_CACHE
))
2087 if (cfg
->fc_ifindex
&&
2089 rt
->dst
.dev
->ifindex
!= cfg
->fc_ifindex
))
2091 if (cfg
->fc_flags
& RTF_GATEWAY
&&
2092 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
2094 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
2097 read_unlock_bh(&table
->tb6_lock
);
2099 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
2102 read_unlock_bh(&table
->tb6_lock
);
2107 static void rt6_do_redirect(struct dst_entry
*dst
, struct sock
*sk
, struct sk_buff
*skb
)
2109 struct netevent_redirect netevent
;
2110 struct rt6_info
*rt
, *nrt
= NULL
;
2111 struct ndisc_options ndopts
;
2112 struct inet6_dev
*in6_dev
;
2113 struct neighbour
*neigh
;
2115 int optlen
, on_link
;
2118 optlen
= skb_tail_pointer(skb
) - skb_transport_header(skb
);
2119 optlen
-= sizeof(*msg
);
2122 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2126 msg
= (struct rd_msg
*)icmp6_hdr(skb
);
2128 if (ipv6_addr_is_multicast(&msg
->dest
)) {
2129 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2134 if (ipv6_addr_equal(&msg
->dest
, &msg
->target
)) {
2136 } else if (ipv6_addr_type(&msg
->target
) !=
2137 (IPV6_ADDR_UNICAST
|IPV6_ADDR_LINKLOCAL
)) {
2138 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2142 in6_dev
= __in6_dev_get(skb
->dev
);
2145 if (in6_dev
->cnf
.forwarding
|| !in6_dev
->cnf
.accept_redirects
)
2149 * The IP source address of the Redirect MUST be the same as the current
2150 * first-hop router for the specified ICMP Destination Address.
2153 if (!ndisc_parse_options(msg
->opt
, optlen
, &ndopts
)) {
2154 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2159 if (ndopts
.nd_opts_tgt_lladdr
) {
2160 lladdr
= ndisc_opt_addr_data(ndopts
.nd_opts_tgt_lladdr
,
2163 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2168 rt
= (struct rt6_info
*) dst
;
2169 if (rt
->rt6i_flags
& RTF_REJECT
) {
2170 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2174 /* Redirect received -> path was valid.
2175 * Look, redirects are sent only in response to data packets,
2176 * so that this nexthop apparently is reachable. --ANK
2178 dst_confirm(&rt
->dst
);
2180 neigh
= __neigh_lookup(&nd_tbl
, &msg
->target
, skb
->dev
, 1);
2185 * We have finally decided to accept it.
2188 neigh_update(neigh
, lladdr
, NUD_STALE
,
2189 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
2190 NEIGH_UPDATE_F_OVERRIDE
|
2191 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
2192 NEIGH_UPDATE_F_ISROUTER
))
2195 nrt
= ip6_rt_cache_alloc(rt
, &msg
->dest
, NULL
);
2199 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
2201 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
2203 nrt
->rt6i_gateway
= *(struct in6_addr
*)neigh
->primary_key
;
2205 if (ip6_ins_rt(nrt
))
2208 netevent
.old
= &rt
->dst
;
2209 netevent
.new = &nrt
->dst
;
2210 netevent
.daddr
= &msg
->dest
;
2211 netevent
.neigh
= neigh
;
2212 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
2214 if (rt
->rt6i_flags
& RTF_CACHE
) {
2215 rt
= (struct rt6_info
*) dst_clone(&rt
->dst
);
2220 neigh_release(neigh
);
2224 * Misc support functions
2227 static void rt6_set_from(struct rt6_info
*rt
, struct rt6_info
*from
)
2229 BUG_ON(from
->dst
.from
);
2231 rt
->rt6i_flags
&= ~RTF_EXPIRES
;
2232 dst_hold(&from
->dst
);
2233 rt
->dst
.from
= &from
->dst
;
2234 dst_init_metrics(&rt
->dst
, dst_metrics_ptr(&from
->dst
), true);
2237 static void ip6_rt_copy_init(struct rt6_info
*rt
, struct rt6_info
*ort
)
2239 rt
->dst
.input
= ort
->dst
.input
;
2240 rt
->dst
.output
= ort
->dst
.output
;
2241 rt
->rt6i_dst
= ort
->rt6i_dst
;
2242 rt
->dst
.error
= ort
->dst
.error
;
2243 rt
->rt6i_idev
= ort
->rt6i_idev
;
2245 in6_dev_hold(rt
->rt6i_idev
);
2246 rt
->dst
.lastuse
= jiffies
;
2247 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
2248 rt
->rt6i_flags
= ort
->rt6i_flags
;
2249 rt6_set_from(rt
, ort
);
2250 rt
->rt6i_metric
= ort
->rt6i_metric
;
2251 #ifdef CONFIG_IPV6_SUBTREES
2252 rt
->rt6i_src
= ort
->rt6i_src
;
2254 rt
->rt6i_prefsrc
= ort
->rt6i_prefsrc
;
2255 rt
->rt6i_table
= ort
->rt6i_table
;
2256 rt
->dst
.lwtstate
= lwtstate_get(ort
->dst
.lwtstate
);
2259 #ifdef CONFIG_IPV6_ROUTE_INFO
2260 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
2261 const struct in6_addr
*prefix
, int prefixlen
,
2262 const struct in6_addr
*gwaddr
, int ifindex
)
2264 struct fib6_node
*fn
;
2265 struct rt6_info
*rt
= NULL
;
2266 struct fib6_table
*table
;
2268 table
= fib6_get_table(net
, RT6_TABLE_INFO
);
2272 read_lock_bh(&table
->tb6_lock
);
2273 fn
= fib6_locate(&table
->tb6_root
, prefix
, prefixlen
, NULL
, 0);
2277 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2278 if (rt
->dst
.dev
->ifindex
!= ifindex
)
2280 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
2282 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
2288 read_unlock_bh(&table
->tb6_lock
);
2292 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
2293 const struct in6_addr
*prefix
, int prefixlen
,
2294 const struct in6_addr
*gwaddr
, int ifindex
,
2297 struct fib6_config cfg
= {
2298 .fc_metric
= IP6_RT_PRIO_USER
,
2299 .fc_ifindex
= ifindex
,
2300 .fc_dst_len
= prefixlen
,
2301 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
2302 RTF_UP
| RTF_PREF(pref
),
2303 .fc_nlinfo
.portid
= 0,
2304 .fc_nlinfo
.nlh
= NULL
,
2305 .fc_nlinfo
.nl_net
= net
,
2308 cfg
.fc_table
= l3mdev_fib_table_by_index(net
, ifindex
) ? : RT6_TABLE_INFO
;
2309 cfg
.fc_dst
= *prefix
;
2310 cfg
.fc_gateway
= *gwaddr
;
2312 /* We should treat it as a default route if prefix length is 0. */
2314 cfg
.fc_flags
|= RTF_DEFAULT
;
2316 ip6_route_add(&cfg
);
2318 return rt6_get_route_info(net
, prefix
, prefixlen
, gwaddr
, ifindex
);
2322 struct rt6_info
*rt6_get_dflt_router(const struct in6_addr
*addr
, struct net_device
*dev
)
2324 struct rt6_info
*rt
;
2325 struct fib6_table
*table
;
2327 table
= fib6_get_table(dev_net(dev
), RT6_TABLE_DFLT
);
2331 read_lock_bh(&table
->tb6_lock
);
2332 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2333 if (dev
== rt
->dst
.dev
&&
2334 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
2335 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
2340 read_unlock_bh(&table
->tb6_lock
);
2344 struct rt6_info
*rt6_add_dflt_router(const struct in6_addr
*gwaddr
,
2345 struct net_device
*dev
,
2348 struct fib6_config cfg
= {
2349 .fc_table
= l3mdev_fib_table(dev
) ? : RT6_TABLE_DFLT
,
2350 .fc_metric
= IP6_RT_PRIO_USER
,
2351 .fc_ifindex
= dev
->ifindex
,
2352 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
2353 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
2354 .fc_nlinfo
.portid
= 0,
2355 .fc_nlinfo
.nlh
= NULL
,
2356 .fc_nlinfo
.nl_net
= dev_net(dev
),
2359 cfg
.fc_gateway
= *gwaddr
;
2361 ip6_route_add(&cfg
);
2363 return rt6_get_dflt_router(gwaddr
, dev
);
2366 void rt6_purge_dflt_routers(struct net
*net
)
2368 struct rt6_info
*rt
;
2369 struct fib6_table
*table
;
2371 /* NOTE: Keep consistent with rt6_get_dflt_router */
2372 table
= fib6_get_table(net
, RT6_TABLE_DFLT
);
2377 read_lock_bh(&table
->tb6_lock
);
2378 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2379 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
) &&
2380 (!rt
->rt6i_idev
|| rt
->rt6i_idev
->cnf
.accept_ra
!= 2)) {
2382 read_unlock_bh(&table
->tb6_lock
);
2387 read_unlock_bh(&table
->tb6_lock
);
2390 static void rtmsg_to_fib6_config(struct net
*net
,
2391 struct in6_rtmsg
*rtmsg
,
2392 struct fib6_config
*cfg
)
2394 memset(cfg
, 0, sizeof(*cfg
));
2396 cfg
->fc_table
= l3mdev_fib_table_by_index(net
, rtmsg
->rtmsg_ifindex
) ?
2398 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
2399 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
2400 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
2401 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
2402 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
2403 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
2405 cfg
->fc_nlinfo
.nl_net
= net
;
2407 cfg
->fc_dst
= rtmsg
->rtmsg_dst
;
2408 cfg
->fc_src
= rtmsg
->rtmsg_src
;
2409 cfg
->fc_gateway
= rtmsg
->rtmsg_gateway
;
2412 int ipv6_route_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
2414 struct fib6_config cfg
;
2415 struct in6_rtmsg rtmsg
;
2419 case SIOCADDRT
: /* Add a route */
2420 case SIOCDELRT
: /* Delete a route */
2421 if (!ns_capable(net
->user_ns
, CAP_NET_ADMIN
))
2423 err
= copy_from_user(&rtmsg
, arg
,
2424 sizeof(struct in6_rtmsg
));
2428 rtmsg_to_fib6_config(net
, &rtmsg
, &cfg
);
2433 err
= ip6_route_add(&cfg
);
2436 err
= ip6_route_del(&cfg
);
2450 * Drop the packet on the floor
2453 static int ip6_pkt_drop(struct sk_buff
*skb
, u8 code
, int ipstats_mib_noroutes
)
2456 struct dst_entry
*dst
= skb_dst(skb
);
2457 switch (ipstats_mib_noroutes
) {
2458 case IPSTATS_MIB_INNOROUTES
:
2459 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
2460 if (type
== IPV6_ADDR_ANY
) {
2461 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2462 IPSTATS_MIB_INADDRERRORS
);
2466 case IPSTATS_MIB_OUTNOROUTES
:
2467 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2468 ipstats_mib_noroutes
);
2471 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0);
2476 static int ip6_pkt_discard(struct sk_buff
*skb
)
2478 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
2481 static int ip6_pkt_discard_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
2483 skb
->dev
= skb_dst(skb
)->dev
;
2484 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
2487 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
2489 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
2492 static int ip6_pkt_prohibit_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
2494 skb
->dev
= skb_dst(skb
)->dev
;
2495 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
2499 * Allocate a dst for local (unicast / anycast) address.
2502 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
2503 const struct in6_addr
*addr
,
2507 struct net
*net
= dev_net(idev
->dev
);
2508 struct rt6_info
*rt
= ip6_dst_alloc(net
, net
->loopback_dev
,
2511 return ERR_PTR(-ENOMEM
);
2515 rt
->dst
.flags
|= DST_HOST
;
2516 rt
->dst
.input
= ip6_input
;
2517 rt
->dst
.output
= ip6_output
;
2518 rt
->rt6i_idev
= idev
;
2520 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
2522 rt
->rt6i_flags
|= RTF_ANYCAST
;
2524 rt
->rt6i_flags
|= RTF_LOCAL
;
2526 rt
->rt6i_gateway
= *addr
;
2527 rt
->rt6i_dst
.addr
= *addr
;
2528 rt
->rt6i_dst
.plen
= 128;
2529 tb_id
= l3mdev_fib_table(idev
->dev
) ? : RT6_TABLE_LOCAL
;
2530 rt
->rt6i_table
= fib6_get_table(net
, tb_id
);
2531 rt
->dst
.flags
|= DST_NOCACHE
;
2533 atomic_set(&rt
->dst
.__refcnt
, 1);
2538 int ip6_route_get_saddr(struct net
*net
,
2539 struct rt6_info
*rt
,
2540 const struct in6_addr
*daddr
,
2542 struct in6_addr
*saddr
)
2544 struct inet6_dev
*idev
=
2545 rt
? ip6_dst_idev((struct dst_entry
*)rt
) : NULL
;
2547 if (rt
&& rt
->rt6i_prefsrc
.plen
)
2548 *saddr
= rt
->rt6i_prefsrc
.addr
;
2550 err
= ipv6_dev_get_saddr(net
, idev
? idev
->dev
: NULL
,
2551 daddr
, prefs
, saddr
);
2555 /* remove deleted ip from prefsrc entries */
2556 struct arg_dev_net_ip
{
2557 struct net_device
*dev
;
2559 struct in6_addr
*addr
;
2562 static int fib6_remove_prefsrc(struct rt6_info
*rt
, void *arg
)
2564 struct net_device
*dev
= ((struct arg_dev_net_ip
*)arg
)->dev
;
2565 struct net
*net
= ((struct arg_dev_net_ip
*)arg
)->net
;
2566 struct in6_addr
*addr
= ((struct arg_dev_net_ip
*)arg
)->addr
;
2568 if (((void *)rt
->dst
.dev
== dev
|| !dev
) &&
2569 rt
!= net
->ipv6
.ip6_null_entry
&&
2570 ipv6_addr_equal(addr
, &rt
->rt6i_prefsrc
.addr
)) {
2571 /* remove prefsrc entry */
2572 rt
->rt6i_prefsrc
.plen
= 0;
2577 void rt6_remove_prefsrc(struct inet6_ifaddr
*ifp
)
2579 struct net
*net
= dev_net(ifp
->idev
->dev
);
2580 struct arg_dev_net_ip adni
= {
2581 .dev
= ifp
->idev
->dev
,
2585 fib6_clean_all(net
, fib6_remove_prefsrc
, &adni
);
2588 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2589 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2591 /* Remove routers and update dst entries when gateway turn into host. */
2592 static int fib6_clean_tohost(struct rt6_info
*rt
, void *arg
)
2594 struct in6_addr
*gateway
= (struct in6_addr
*)arg
;
2596 if ((((rt
->rt6i_flags
& RTF_RA_ROUTER
) == RTF_RA_ROUTER
) ||
2597 ((rt
->rt6i_flags
& RTF_CACHE_GATEWAY
) == RTF_CACHE_GATEWAY
)) &&
2598 ipv6_addr_equal(gateway
, &rt
->rt6i_gateway
)) {
2604 void rt6_clean_tohost(struct net
*net
, struct in6_addr
*gateway
)
2606 fib6_clean_all(net
, fib6_clean_tohost
, gateway
);
2609 struct arg_dev_net
{
2610 struct net_device
*dev
;
2614 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
2616 const struct arg_dev_net
*adn
= arg
;
2617 const struct net_device
*dev
= adn
->dev
;
2619 if ((rt
->dst
.dev
== dev
|| !dev
) &&
2620 rt
!= adn
->net
->ipv6
.ip6_null_entry
)
2626 void rt6_ifdown(struct net
*net
, struct net_device
*dev
)
2628 struct arg_dev_net adn
= {
2633 fib6_clean_all(net
, fib6_ifdown
, &adn
);
2634 icmp6_clean_all(fib6_ifdown
, &adn
);
2636 rt6_uncached_list_flush_dev(net
, dev
);
2639 struct rt6_mtu_change_arg
{
2640 struct net_device
*dev
;
2644 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
2646 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
2647 struct inet6_dev
*idev
;
2649 /* In IPv6 pmtu discovery is not optional,
2650 so that RTAX_MTU lock cannot disable it.
2651 We still use this lock to block changes
2652 caused by addrconf/ndisc.
2655 idev
= __in6_dev_get(arg
->dev
);
2659 /* For administrative MTU increase, there is no way to discover
2660 IPv6 PMTU increase, so PMTU increase should be updated here.
2661 Since RFC 1981 doesn't include administrative MTU increase
2662 update PMTU increase is a MUST. (i.e. jumbo frame)
2665 If new MTU is less than route PMTU, this new MTU will be the
2666 lowest MTU in the path, update the route PMTU to reflect PMTU
2667 decreases; if new MTU is greater than route PMTU, and the
2668 old MTU is the lowest MTU in the path, update the route PMTU
2669 to reflect the increase. In this case if the other nodes' MTU
2670 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2673 if (rt
->dst
.dev
== arg
->dev
&&
2674 !dst_metric_locked(&rt
->dst
, RTAX_MTU
)) {
2675 if (rt
->rt6i_flags
& RTF_CACHE
) {
2676 /* For RTF_CACHE with rt6i_pmtu == 0
2677 * (i.e. a redirected route),
2678 * the metrics of its rt->dst.from has already
2681 if (rt
->rt6i_pmtu
&& rt
->rt6i_pmtu
> arg
->mtu
)
2682 rt
->rt6i_pmtu
= arg
->mtu
;
2683 } else if (dst_mtu(&rt
->dst
) >= arg
->mtu
||
2684 (dst_mtu(&rt
->dst
) < arg
->mtu
&&
2685 dst_mtu(&rt
->dst
) == idev
->cnf
.mtu6
)) {
2686 dst_metric_set(&rt
->dst
, RTAX_MTU
, arg
->mtu
);
2692 void rt6_mtu_change(struct net_device
*dev
, unsigned int mtu
)
2694 struct rt6_mtu_change_arg arg
= {
2699 fib6_clean_all(dev_net(dev
), rt6_mtu_change_route
, &arg
);
2702 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2703 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2704 [RTA_OIF
] = { .type
= NLA_U32
},
2705 [RTA_IIF
] = { .type
= NLA_U32
},
2706 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2707 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2708 [RTA_MULTIPATH
] = { .len
= sizeof(struct rtnexthop
) },
2709 [RTA_PREF
] = { .type
= NLA_U8
},
2710 [RTA_ENCAP_TYPE
] = { .type
= NLA_U16
},
2711 [RTA_ENCAP
] = { .type
= NLA_NESTED
},
2712 [RTA_EXPIRES
] = { .type
= NLA_U32
},
2715 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2716 struct fib6_config
*cfg
)
2719 struct nlattr
*tb
[RTA_MAX
+1];
2723 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2728 rtm
= nlmsg_data(nlh
);
2729 memset(cfg
, 0, sizeof(*cfg
));
2731 cfg
->fc_table
= rtm
->rtm_table
;
2732 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2733 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2734 cfg
->fc_flags
= RTF_UP
;
2735 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2736 cfg
->fc_type
= rtm
->rtm_type
;
2738 if (rtm
->rtm_type
== RTN_UNREACHABLE
||
2739 rtm
->rtm_type
== RTN_BLACKHOLE
||
2740 rtm
->rtm_type
== RTN_PROHIBIT
||
2741 rtm
->rtm_type
== RTN_THROW
)
2742 cfg
->fc_flags
|= RTF_REJECT
;
2744 if (rtm
->rtm_type
== RTN_LOCAL
)
2745 cfg
->fc_flags
|= RTF_LOCAL
;
2747 if (rtm
->rtm_flags
& RTM_F_CLONED
)
2748 cfg
->fc_flags
|= RTF_CACHE
;
2750 cfg
->fc_nlinfo
.portid
= NETLINK_CB(skb
).portid
;
2751 cfg
->fc_nlinfo
.nlh
= nlh
;
2752 cfg
->fc_nlinfo
.nl_net
= sock_net(skb
->sk
);
2754 if (tb
[RTA_GATEWAY
]) {
2755 cfg
->fc_gateway
= nla_get_in6_addr(tb
[RTA_GATEWAY
]);
2756 cfg
->fc_flags
|= RTF_GATEWAY
;
2760 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
2762 if (nla_len(tb
[RTA_DST
]) < plen
)
2765 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
2769 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
2771 if (nla_len(tb
[RTA_SRC
]) < plen
)
2774 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
2777 if (tb
[RTA_PREFSRC
])
2778 cfg
->fc_prefsrc
= nla_get_in6_addr(tb
[RTA_PREFSRC
]);
2781 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
2783 if (tb
[RTA_PRIORITY
])
2784 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
2786 if (tb
[RTA_METRICS
]) {
2787 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
2788 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
2792 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
2794 if (tb
[RTA_MULTIPATH
]) {
2795 cfg
->fc_mp
= nla_data(tb
[RTA_MULTIPATH
]);
2796 cfg
->fc_mp_len
= nla_len(tb
[RTA_MULTIPATH
]);
2800 pref
= nla_get_u8(tb
[RTA_PREF
]);
2801 if (pref
!= ICMPV6_ROUTER_PREF_LOW
&&
2802 pref
!= ICMPV6_ROUTER_PREF_HIGH
)
2803 pref
= ICMPV6_ROUTER_PREF_MEDIUM
;
2804 cfg
->fc_flags
|= RTF_PREF(pref
);
2808 cfg
->fc_encap
= tb
[RTA_ENCAP
];
2810 if (tb
[RTA_ENCAP_TYPE
])
2811 cfg
->fc_encap_type
= nla_get_u16(tb
[RTA_ENCAP_TYPE
]);
2813 if (tb
[RTA_EXPIRES
]) {
2814 unsigned long timeout
= addrconf_timeout_fixup(nla_get_u32(tb
[RTA_EXPIRES
]), HZ
);
2816 if (addrconf_finite_timeout(timeout
)) {
2817 cfg
->fc_expires
= jiffies_to_clock_t(timeout
* HZ
);
2818 cfg
->fc_flags
|= RTF_EXPIRES
;
2828 struct rt6_info
*rt6_info
;
2829 struct fib6_config r_cfg
;
2830 struct mx6_config mxc
;
2831 struct list_head next
;
2834 static void ip6_print_replace_route_err(struct list_head
*rt6_nh_list
)
2838 list_for_each_entry(nh
, rt6_nh_list
, next
) {
2839 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2840 &nh
->r_cfg
.fc_dst
, &nh
->r_cfg
.fc_gateway
,
2841 nh
->r_cfg
.fc_ifindex
);
2845 static int ip6_route_info_append(struct list_head
*rt6_nh_list
,
2846 struct rt6_info
*rt
, struct fib6_config
*r_cfg
)
2849 struct rt6_info
*rtnh
;
2852 list_for_each_entry(nh
, rt6_nh_list
, next
) {
2853 /* check if rt6_info already exists */
2854 rtnh
= nh
->rt6_info
;
2856 if (rtnh
->dst
.dev
== rt
->dst
.dev
&&
2857 rtnh
->rt6i_idev
== rt
->rt6i_idev
&&
2858 ipv6_addr_equal(&rtnh
->rt6i_gateway
,
2863 nh
= kzalloc(sizeof(*nh
), GFP_KERNEL
);
2867 err
= ip6_convert_metrics(&nh
->mxc
, r_cfg
);
2872 memcpy(&nh
->r_cfg
, r_cfg
, sizeof(*r_cfg
));
2873 list_add_tail(&nh
->next
, rt6_nh_list
);
2878 static int ip6_route_multipath_add(struct fib6_config
*cfg
)
2880 struct fib6_config r_cfg
;
2881 struct rtnexthop
*rtnh
;
2882 struct rt6_info
*rt
;
2883 struct rt6_nh
*err_nh
;
2884 struct rt6_nh
*nh
, *nh_safe
;
2889 int replace
= (cfg
->fc_nlinfo
.nlh
&&
2890 (cfg
->fc_nlinfo
.nlh
->nlmsg_flags
& NLM_F_REPLACE
));
2891 LIST_HEAD(rt6_nh_list
);
2893 remaining
= cfg
->fc_mp_len
;
2894 rtnh
= (struct rtnexthop
*)cfg
->fc_mp
;
2896 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2897 * rt6_info structs per nexthop
2899 while (rtnh_ok(rtnh
, remaining
)) {
2900 memcpy(&r_cfg
, cfg
, sizeof(*cfg
));
2901 if (rtnh
->rtnh_ifindex
)
2902 r_cfg
.fc_ifindex
= rtnh
->rtnh_ifindex
;
2904 attrlen
= rtnh_attrlen(rtnh
);
2906 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
2908 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
2910 r_cfg
.fc_gateway
= nla_get_in6_addr(nla
);
2911 r_cfg
.fc_flags
|= RTF_GATEWAY
;
2913 r_cfg
.fc_encap
= nla_find(attrs
, attrlen
, RTA_ENCAP
);
2914 nla
= nla_find(attrs
, attrlen
, RTA_ENCAP_TYPE
);
2916 r_cfg
.fc_encap_type
= nla_get_u16(nla
);
2919 rt
= ip6_route_info_create(&r_cfg
);
2926 err
= ip6_route_info_append(&rt6_nh_list
, rt
, &r_cfg
);
2932 rtnh
= rtnh_next(rtnh
, &remaining
);
2936 list_for_each_entry(nh
, &rt6_nh_list
, next
) {
2937 err
= __ip6_ins_rt(nh
->rt6_info
, &cfg
->fc_nlinfo
, &nh
->mxc
);
2938 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2939 nh
->rt6_info
= NULL
;
2942 ip6_print_replace_route_err(&rt6_nh_list
);
2947 /* Because each route is added like a single route we remove
2948 * these flags after the first nexthop: if there is a collision,
2949 * we have already failed to add the first nexthop:
2950 * fib6_add_rt2node() has rejected it; when replacing, old
2951 * nexthops have been replaced by first new, the rest should
2954 cfg
->fc_nlinfo
.nlh
->nlmsg_flags
&= ~(NLM_F_EXCL
|
2962 /* Delete routes that were already added */
2963 list_for_each_entry(nh
, &rt6_nh_list
, next
) {
2966 ip6_route_del(&nh
->r_cfg
);
2970 list_for_each_entry_safe(nh
, nh_safe
, &rt6_nh_list
, next
) {
2972 dst_free(&nh
->rt6_info
->dst
);
2974 list_del(&nh
->next
);
2981 static int ip6_route_multipath_del(struct fib6_config
*cfg
)
2983 struct fib6_config r_cfg
;
2984 struct rtnexthop
*rtnh
;
2987 int err
= 1, last_err
= 0;
2989 remaining
= cfg
->fc_mp_len
;
2990 rtnh
= (struct rtnexthop
*)cfg
->fc_mp
;
2992 /* Parse a Multipath Entry */
2993 while (rtnh_ok(rtnh
, remaining
)) {
2994 memcpy(&r_cfg
, cfg
, sizeof(*cfg
));
2995 if (rtnh
->rtnh_ifindex
)
2996 r_cfg
.fc_ifindex
= rtnh
->rtnh_ifindex
;
2998 attrlen
= rtnh_attrlen(rtnh
);
3000 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
3002 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
3004 nla_memcpy(&r_cfg
.fc_gateway
, nla
, 16);
3005 r_cfg
.fc_flags
|= RTF_GATEWAY
;
3008 err
= ip6_route_del(&r_cfg
);
3012 rtnh
= rtnh_next(rtnh
, &remaining
);
3018 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
3020 struct fib6_config cfg
;
3023 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
3028 return ip6_route_multipath_del(&cfg
);
3030 return ip6_route_del(&cfg
);
3033 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
3035 struct fib6_config cfg
;
3038 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
3043 return ip6_route_multipath_add(&cfg
);
3045 return ip6_route_add(&cfg
);
3048 static inline size_t rt6_nlmsg_size(struct rt6_info
*rt
)
3050 return NLMSG_ALIGN(sizeof(struct rtmsg
))
3051 + nla_total_size(16) /* RTA_SRC */
3052 + nla_total_size(16) /* RTA_DST */
3053 + nla_total_size(16) /* RTA_GATEWAY */
3054 + nla_total_size(16) /* RTA_PREFSRC */
3055 + nla_total_size(4) /* RTA_TABLE */
3056 + nla_total_size(4) /* RTA_IIF */
3057 + nla_total_size(4) /* RTA_OIF */
3058 + nla_total_size(4) /* RTA_PRIORITY */
3059 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
3060 + nla_total_size(sizeof(struct rta_cacheinfo
))
3061 + nla_total_size(TCP_CA_NAME_MAX
) /* RTAX_CC_ALGO */
3062 + nla_total_size(1) /* RTA_PREF */
3063 + lwtunnel_get_encap_size(rt
->dst
.lwtstate
);
3066 static int rt6_fill_node(struct net
*net
,
3067 struct sk_buff
*skb
, struct rt6_info
*rt
,
3068 struct in6_addr
*dst
, struct in6_addr
*src
,
3069 int iif
, int type
, u32 portid
, u32 seq
,
3070 int prefix
, int nowait
, unsigned int flags
)
3072 u32 metrics
[RTAX_MAX
];
3074 struct nlmsghdr
*nlh
;
3078 if (prefix
) { /* user wants prefix routes only */
3079 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
3080 /* success since this is not a prefix route */
3085 nlh
= nlmsg_put(skb
, portid
, seq
, type
, sizeof(*rtm
), flags
);
3089 rtm
= nlmsg_data(nlh
);
3090 rtm
->rtm_family
= AF_INET6
;
3091 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
3092 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
3095 table
= rt
->rt6i_table
->tb6_id
;
3097 table
= RT6_TABLE_UNSPEC
;
3098 rtm
->rtm_table
= table
;
3099 if (nla_put_u32(skb
, RTA_TABLE
, table
))
3100 goto nla_put_failure
;
3101 if (rt
->rt6i_flags
& RTF_REJECT
) {
3102 switch (rt
->dst
.error
) {
3104 rtm
->rtm_type
= RTN_BLACKHOLE
;
3107 rtm
->rtm_type
= RTN_PROHIBIT
;
3110 rtm
->rtm_type
= RTN_THROW
;
3113 rtm
->rtm_type
= RTN_UNREACHABLE
;
3117 else if (rt
->rt6i_flags
& RTF_LOCAL
)
3118 rtm
->rtm_type
= RTN_LOCAL
;
3119 else if (rt
->dst
.dev
&& (rt
->dst
.dev
->flags
& IFF_LOOPBACK
))
3120 rtm
->rtm_type
= RTN_LOCAL
;
3122 rtm
->rtm_type
= RTN_UNICAST
;
3124 if (!netif_carrier_ok(rt
->dst
.dev
)) {
3125 rtm
->rtm_flags
|= RTNH_F_LINKDOWN
;
3126 if (rt
->rt6i_idev
->cnf
.ignore_routes_with_linkdown
)
3127 rtm
->rtm_flags
|= RTNH_F_DEAD
;
3129 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
3130 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
3131 if (rt
->rt6i_flags
& RTF_DYNAMIC
)
3132 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
3133 else if (rt
->rt6i_flags
& RTF_ADDRCONF
) {
3134 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ROUTEINFO
))
3135 rtm
->rtm_protocol
= RTPROT_RA
;
3137 rtm
->rtm_protocol
= RTPROT_KERNEL
;
3140 if (rt
->rt6i_flags
& RTF_CACHE
)
3141 rtm
->rtm_flags
|= RTM_F_CLONED
;
3144 if (nla_put_in6_addr(skb
, RTA_DST
, dst
))
3145 goto nla_put_failure
;
3146 rtm
->rtm_dst_len
= 128;
3147 } else if (rtm
->rtm_dst_len
)
3148 if (nla_put_in6_addr(skb
, RTA_DST
, &rt
->rt6i_dst
.addr
))
3149 goto nla_put_failure
;
3150 #ifdef CONFIG_IPV6_SUBTREES
3152 if (nla_put_in6_addr(skb
, RTA_SRC
, src
))
3153 goto nla_put_failure
;
3154 rtm
->rtm_src_len
= 128;
3155 } else if (rtm
->rtm_src_len
&&
3156 nla_put_in6_addr(skb
, RTA_SRC
, &rt
->rt6i_src
.addr
))
3157 goto nla_put_failure
;
3160 #ifdef CONFIG_IPV6_MROUTE
3161 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
)) {
3162 int err
= ip6mr_get_route(net
, skb
, rtm
, nowait
);
3167 goto nla_put_failure
;
3169 if (err
== -EMSGSIZE
)
3170 goto nla_put_failure
;
3175 if (nla_put_u32(skb
, RTA_IIF
, iif
))
3176 goto nla_put_failure
;
3178 struct in6_addr saddr_buf
;
3179 if (ip6_route_get_saddr(net
, rt
, dst
, 0, &saddr_buf
) == 0 &&
3180 nla_put_in6_addr(skb
, RTA_PREFSRC
, &saddr_buf
))
3181 goto nla_put_failure
;
3184 if (rt
->rt6i_prefsrc
.plen
) {
3185 struct in6_addr saddr_buf
;
3186 saddr_buf
= rt
->rt6i_prefsrc
.addr
;
3187 if (nla_put_in6_addr(skb
, RTA_PREFSRC
, &saddr_buf
))
3188 goto nla_put_failure
;
3191 memcpy(metrics
, dst_metrics_ptr(&rt
->dst
), sizeof(metrics
));
3193 metrics
[RTAX_MTU
- 1] = rt
->rt6i_pmtu
;
3194 if (rtnetlink_put_metrics(skb
, metrics
) < 0)
3195 goto nla_put_failure
;
3197 if (rt
->rt6i_flags
& RTF_GATEWAY
) {
3198 if (nla_put_in6_addr(skb
, RTA_GATEWAY
, &rt
->rt6i_gateway
) < 0)
3199 goto nla_put_failure
;
3203 nla_put_u32(skb
, RTA_OIF
, rt
->dst
.dev
->ifindex
))
3204 goto nla_put_failure
;
3205 if (nla_put_u32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
))
3206 goto nla_put_failure
;
3208 expires
= (rt
->rt6i_flags
& RTF_EXPIRES
) ? rt
->dst
.expires
- jiffies
: 0;
3210 if (rtnl_put_cacheinfo(skb
, &rt
->dst
, 0, expires
, rt
->dst
.error
) < 0)
3211 goto nla_put_failure
;
3213 if (nla_put_u8(skb
, RTA_PREF
, IPV6_EXTRACT_PREF(rt
->rt6i_flags
)))
3214 goto nla_put_failure
;
3216 lwtunnel_fill_encap(skb
, rt
->dst
.lwtstate
);
3218 nlmsg_end(skb
, nlh
);
3222 nlmsg_cancel(skb
, nlh
);
3226 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
3228 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
3231 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
3232 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
3233 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
3237 return rt6_fill_node(arg
->net
,
3238 arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
3239 NETLINK_CB(arg
->cb
->skb
).portid
, arg
->cb
->nlh
->nlmsg_seq
,
3240 prefix
, 0, NLM_F_MULTI
);
3243 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
*nlh
)
3245 struct net
*net
= sock_net(in_skb
->sk
);
3246 struct nlattr
*tb
[RTA_MAX
+1];
3247 struct rt6_info
*rt
;
3248 struct sk_buff
*skb
;
3251 int err
, iif
= 0, oif
= 0;
3253 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
3258 memset(&fl6
, 0, sizeof(fl6
));
3261 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
3264 fl6
.saddr
= *(struct in6_addr
*)nla_data(tb
[RTA_SRC
]);
3268 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
3271 fl6
.daddr
= *(struct in6_addr
*)nla_data(tb
[RTA_DST
]);
3275 iif
= nla_get_u32(tb
[RTA_IIF
]);
3278 oif
= nla_get_u32(tb
[RTA_OIF
]);
3281 fl6
.flowi6_mark
= nla_get_u32(tb
[RTA_MARK
]);
3284 struct net_device
*dev
;
3287 dev
= __dev_get_by_index(net
, iif
);
3293 fl6
.flowi6_iif
= iif
;
3295 if (!ipv6_addr_any(&fl6
.saddr
))
3296 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
3298 rt
= (struct rt6_info
*)ip6_route_input_lookup(net
, dev
, &fl6
,
3301 fl6
.flowi6_oif
= oif
;
3303 if (netif_index_is_l3_master(net
, oif
)) {
3304 fl6
.flowi6_flags
= FLOWI_FLAG_L3MDEV_SRC
|
3305 FLOWI_FLAG_SKIP_NH_OIF
;
3308 rt
= (struct rt6_info
*)ip6_route_output(net
, NULL
, &fl6
);
3311 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
3318 /* Reserve room for dummy headers, this skb can pass
3319 through good chunk of routing engine.
3321 skb_reset_mac_header(skb
);
3322 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
3324 skb_dst_set(skb
, &rt
->dst
);
3326 err
= rt6_fill_node(net
, skb
, rt
, &fl6
.daddr
, &fl6
.saddr
, iif
,
3327 RTM_NEWROUTE
, NETLINK_CB(in_skb
).portid
,
3328 nlh
->nlmsg_seq
, 0, 0, 0);
3334 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).portid
);
3339 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
,
3340 unsigned int nlm_flags
)
3342 struct sk_buff
*skb
;
3343 struct net
*net
= info
->nl_net
;
3348 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
3350 skb
= nlmsg_new(rt6_nlmsg_size(rt
), gfp_any());
3354 err
= rt6_fill_node(net
, skb
, rt
, NULL
, NULL
, 0,
3355 event
, info
->portid
, seq
, 0, 0, nlm_flags
);
3357 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3358 WARN_ON(err
== -EMSGSIZE
);
3362 rtnl_notify(skb
, net
, info
->portid
, RTNLGRP_IPV6_ROUTE
,
3363 info
->nlh
, gfp_any());
3367 rtnl_set_sk_err(net
, RTNLGRP_IPV6_ROUTE
, err
);
3370 static int ip6_route_dev_notify(struct notifier_block
*this,
3371 unsigned long event
, void *ptr
)
3373 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
3374 struct net
*net
= dev_net(dev
);
3376 if (event
== NETDEV_REGISTER
&& (dev
->flags
& IFF_LOOPBACK
)) {
3377 net
->ipv6
.ip6_null_entry
->dst
.dev
= dev
;
3378 net
->ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(dev
);
3379 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3380 net
->ipv6
.ip6_prohibit_entry
->dst
.dev
= dev
;
3381 net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(dev
);
3382 net
->ipv6
.ip6_blk_hole_entry
->dst
.dev
= dev
;
3383 net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(dev
);
3394 #ifdef CONFIG_PROC_FS
3396 static const struct file_operations ipv6_route_proc_fops
= {
3397 .owner
= THIS_MODULE
,
3398 .open
= ipv6_route_open
,
3400 .llseek
= seq_lseek
,
3401 .release
= seq_release_net
,
3404 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
3406 struct net
*net
= (struct net
*)seq
->private;
3407 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
3408 net
->ipv6
.rt6_stats
->fib_nodes
,
3409 net
->ipv6
.rt6_stats
->fib_route_nodes
,
3410 net
->ipv6
.rt6_stats
->fib_rt_alloc
,
3411 net
->ipv6
.rt6_stats
->fib_rt_entries
,
3412 net
->ipv6
.rt6_stats
->fib_rt_cache
,
3413 dst_entries_get_slow(&net
->ipv6
.ip6_dst_ops
),
3414 net
->ipv6
.rt6_stats
->fib_discarded_routes
);
3419 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
3421 return single_open_net(inode
, file
, rt6_stats_seq_show
);
3424 static const struct file_operations rt6_stats_seq_fops
= {
3425 .owner
= THIS_MODULE
,
3426 .open
= rt6_stats_seq_open
,
3428 .llseek
= seq_lseek
,
3429 .release
= single_release_net
,
3431 #endif /* CONFIG_PROC_FS */
3433 #ifdef CONFIG_SYSCTL
3436 int ipv6_sysctl_rtcache_flush(struct ctl_table
*ctl
, int write
,
3437 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
3444 net
= (struct net
*)ctl
->extra1
;
3445 delay
= net
->ipv6
.sysctl
.flush_delay
;
3446 proc_dointvec(ctl
, write
, buffer
, lenp
, ppos
);
3447 fib6_run_gc(delay
<= 0 ? 0 : (unsigned long)delay
, net
, delay
> 0);
3451 struct ctl_table ipv6_route_table_template
[] = {
3453 .procname
= "flush",
3454 .data
= &init_net
.ipv6
.sysctl
.flush_delay
,
3455 .maxlen
= sizeof(int),
3457 .proc_handler
= ipv6_sysctl_rtcache_flush
3460 .procname
= "gc_thresh",
3461 .data
= &ip6_dst_ops_template
.gc_thresh
,
3462 .maxlen
= sizeof(int),
3464 .proc_handler
= proc_dointvec
,
3467 .procname
= "max_size",
3468 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_max_size
,
3469 .maxlen
= sizeof(int),
3471 .proc_handler
= proc_dointvec
,
3474 .procname
= "gc_min_interval",
3475 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
3476 .maxlen
= sizeof(int),
3478 .proc_handler
= proc_dointvec_jiffies
,
3481 .procname
= "gc_timeout",
3482 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_timeout
,
3483 .maxlen
= sizeof(int),
3485 .proc_handler
= proc_dointvec_jiffies
,
3488 .procname
= "gc_interval",
3489 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_interval
,
3490 .maxlen
= sizeof(int),
3492 .proc_handler
= proc_dointvec_jiffies
,
3495 .procname
= "gc_elasticity",
3496 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_elasticity
,
3497 .maxlen
= sizeof(int),
3499 .proc_handler
= proc_dointvec
,
3502 .procname
= "mtu_expires",
3503 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_mtu_expires
,
3504 .maxlen
= sizeof(int),
3506 .proc_handler
= proc_dointvec_jiffies
,
3509 .procname
= "min_adv_mss",
3510 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_min_advmss
,
3511 .maxlen
= sizeof(int),
3513 .proc_handler
= proc_dointvec
,
3516 .procname
= "gc_min_interval_ms",
3517 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
3518 .maxlen
= sizeof(int),
3520 .proc_handler
= proc_dointvec_ms_jiffies
,
3525 struct ctl_table
* __net_init
ipv6_route_sysctl_init(struct net
*net
)
3527 struct ctl_table
*table
;
3529 table
= kmemdup(ipv6_route_table_template
,
3530 sizeof(ipv6_route_table_template
),
3534 table
[0].data
= &net
->ipv6
.sysctl
.flush_delay
;
3535 table
[0].extra1
= net
;
3536 table
[1].data
= &net
->ipv6
.ip6_dst_ops
.gc_thresh
;
3537 table
[2].data
= &net
->ipv6
.sysctl
.ip6_rt_max_size
;
3538 table
[3].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
3539 table
[4].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
3540 table
[5].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_interval
;
3541 table
[6].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
3542 table
[7].data
= &net
->ipv6
.sysctl
.ip6_rt_mtu_expires
;
3543 table
[8].data
= &net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
3544 table
[9].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
3546 /* Don't export sysctls to unprivileged users */
3547 if (net
->user_ns
!= &init_user_ns
)
3548 table
[0].procname
= NULL
;
3555 static int __net_init
ip6_route_net_init(struct net
*net
)
3559 memcpy(&net
->ipv6
.ip6_dst_ops
, &ip6_dst_ops_template
,
3560 sizeof(net
->ipv6
.ip6_dst_ops
));
3562 if (dst_entries_init(&net
->ipv6
.ip6_dst_ops
) < 0)
3563 goto out_ip6_dst_ops
;
3565 net
->ipv6
.ip6_null_entry
= kmemdup(&ip6_null_entry_template
,
3566 sizeof(*net
->ipv6
.ip6_null_entry
),
3568 if (!net
->ipv6
.ip6_null_entry
)
3569 goto out_ip6_dst_entries
;
3570 net
->ipv6
.ip6_null_entry
->dst
.path
=
3571 (struct dst_entry
*)net
->ipv6
.ip6_null_entry
;
3572 net
->ipv6
.ip6_null_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
3573 dst_init_metrics(&net
->ipv6
.ip6_null_entry
->dst
,
3574 ip6_template_metrics
, true);
3576 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3577 net
->ipv6
.ip6_prohibit_entry
= kmemdup(&ip6_prohibit_entry_template
,
3578 sizeof(*net
->ipv6
.ip6_prohibit_entry
),
3580 if (!net
->ipv6
.ip6_prohibit_entry
)
3581 goto out_ip6_null_entry
;
3582 net
->ipv6
.ip6_prohibit_entry
->dst
.path
=
3583 (struct dst_entry
*)net
->ipv6
.ip6_prohibit_entry
;
3584 net
->ipv6
.ip6_prohibit_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
3585 dst_init_metrics(&net
->ipv6
.ip6_prohibit_entry
->dst
,
3586 ip6_template_metrics
, true);
3588 net
->ipv6
.ip6_blk_hole_entry
= kmemdup(&ip6_blk_hole_entry_template
,
3589 sizeof(*net
->ipv6
.ip6_blk_hole_entry
),
3591 if (!net
->ipv6
.ip6_blk_hole_entry
)
3592 goto out_ip6_prohibit_entry
;
3593 net
->ipv6
.ip6_blk_hole_entry
->dst
.path
=
3594 (struct dst_entry
*)net
->ipv6
.ip6_blk_hole_entry
;
3595 net
->ipv6
.ip6_blk_hole_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
3596 dst_init_metrics(&net
->ipv6
.ip6_blk_hole_entry
->dst
,
3597 ip6_template_metrics
, true);
3600 net
->ipv6
.sysctl
.flush_delay
= 0;
3601 net
->ipv6
.sysctl
.ip6_rt_max_size
= 4096;
3602 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= HZ
/ 2;
3603 net
->ipv6
.sysctl
.ip6_rt_gc_timeout
= 60*HZ
;
3604 net
->ipv6
.sysctl
.ip6_rt_gc_interval
= 30*HZ
;
3605 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 9;
3606 net
->ipv6
.sysctl
.ip6_rt_mtu_expires
= 10*60*HZ
;
3607 net
->ipv6
.sysctl
.ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
3609 net
->ipv6
.ip6_rt_gc_expire
= 30*HZ
;
3615 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3616 out_ip6_prohibit_entry
:
3617 kfree(net
->ipv6
.ip6_prohibit_entry
);
3619 kfree(net
->ipv6
.ip6_null_entry
);
3621 out_ip6_dst_entries
:
3622 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
3627 static void __net_exit
ip6_route_net_exit(struct net
*net
)
3629 kfree(net
->ipv6
.ip6_null_entry
);
3630 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3631 kfree(net
->ipv6
.ip6_prohibit_entry
);
3632 kfree(net
->ipv6
.ip6_blk_hole_entry
);
3634 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
3637 static int __net_init
ip6_route_net_init_late(struct net
*net
)
3639 #ifdef CONFIG_PROC_FS
3640 proc_create("ipv6_route", 0, net
->proc_net
, &ipv6_route_proc_fops
);
3641 proc_create("rt6_stats", S_IRUGO
, net
->proc_net
, &rt6_stats_seq_fops
);
3646 static void __net_exit
ip6_route_net_exit_late(struct net
*net
)
3648 #ifdef CONFIG_PROC_FS
3649 remove_proc_entry("ipv6_route", net
->proc_net
);
3650 remove_proc_entry("rt6_stats", net
->proc_net
);
3654 static struct pernet_operations ip6_route_net_ops
= {
3655 .init
= ip6_route_net_init
,
3656 .exit
= ip6_route_net_exit
,
3659 static int __net_init
ipv6_inetpeer_init(struct net
*net
)
3661 struct inet_peer_base
*bp
= kmalloc(sizeof(*bp
), GFP_KERNEL
);
3665 inet_peer_base_init(bp
);
3666 net
->ipv6
.peers
= bp
;
3670 static void __net_exit
ipv6_inetpeer_exit(struct net
*net
)
3672 struct inet_peer_base
*bp
= net
->ipv6
.peers
;
3674 net
->ipv6
.peers
= NULL
;
3675 inetpeer_invalidate_tree(bp
);
3679 static struct pernet_operations ipv6_inetpeer_ops
= {
3680 .init
= ipv6_inetpeer_init
,
3681 .exit
= ipv6_inetpeer_exit
,
3684 static struct pernet_operations ip6_route_net_late_ops
= {
3685 .init
= ip6_route_net_init_late
,
3686 .exit
= ip6_route_net_exit_late
,
3689 static struct notifier_block ip6_route_dev_notifier
= {
3690 .notifier_call
= ip6_route_dev_notify
,
3694 int __init
ip6_route_init(void)
3700 ip6_dst_ops_template
.kmem_cachep
=
3701 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
3702 SLAB_HWCACHE_ALIGN
, NULL
);
3703 if (!ip6_dst_ops_template
.kmem_cachep
)
3706 ret
= dst_entries_init(&ip6_dst_blackhole_ops
);
3708 goto out_kmem_cache
;
3710 ret
= register_pernet_subsys(&ipv6_inetpeer_ops
);
3712 goto out_dst_entries
;
3714 ret
= register_pernet_subsys(&ip6_route_net_ops
);
3716 goto out_register_inetpeer
;
3718 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops_template
.kmem_cachep
;
3720 /* Registering of the loopback is done before this portion of code,
3721 * the loopback reference in rt6_info will not be taken, do it
3722 * manually for init_net */
3723 init_net
.ipv6
.ip6_null_entry
->dst
.dev
= init_net
.loopback_dev
;
3724 init_net
.ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3725 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3726 init_net
.ipv6
.ip6_prohibit_entry
->dst
.dev
= init_net
.loopback_dev
;
3727 init_net
.ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3728 init_net
.ipv6
.ip6_blk_hole_entry
->dst
.dev
= init_net
.loopback_dev
;
3729 init_net
.ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3733 goto out_register_subsys
;
3739 ret
= fib6_rules_init();
3743 ret
= register_pernet_subsys(&ip6_route_net_late_ops
);
3745 goto fib6_rules_init
;
3748 if (__rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
, NULL
) ||
3749 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
, NULL
) ||
3750 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
, NULL
))
3751 goto out_register_late_subsys
;
3753 ret
= register_netdevice_notifier(&ip6_route_dev_notifier
);
3755 goto out_register_late_subsys
;
3757 for_each_possible_cpu(cpu
) {
3758 struct uncached_list
*ul
= per_cpu_ptr(&rt6_uncached_list
, cpu
);
3760 INIT_LIST_HEAD(&ul
->head
);
3761 spin_lock_init(&ul
->lock
);
3767 out_register_late_subsys
:
3768 unregister_pernet_subsys(&ip6_route_net_late_ops
);
3770 fib6_rules_cleanup();
3775 out_register_subsys
:
3776 unregister_pernet_subsys(&ip6_route_net_ops
);
3777 out_register_inetpeer
:
3778 unregister_pernet_subsys(&ipv6_inetpeer_ops
);
3780 dst_entries_destroy(&ip6_dst_blackhole_ops
);
3782 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);
3786 void ip6_route_cleanup(void)
3788 unregister_netdevice_notifier(&ip6_route_dev_notifier
);
3789 unregister_pernet_subsys(&ip6_route_net_late_ops
);
3790 fib6_rules_cleanup();
3793 unregister_pernet_subsys(&ipv6_inetpeer_ops
);
3794 unregister_pernet_subsys(&ip6_route_net_ops
);
3795 dst_entries_destroy(&ip6_dst_blackhole_ops
);
3796 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);