Merge branch 'fix/hda' into topic/hda
[linux/fpc-iii.git] / net / ipv6 / route.c
blob843406f14d7b2b37ac33b039f52fe80feb132cba
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 /* Changes:
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int ip6_default_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
100 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
127 return p;
130 static struct dst_ops ip6_dst_ops_template = {
131 .family = AF_INET6,
132 .protocol = cpu_to_be16(ETH_P_IPV6),
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
136 .default_advmss = ip6_default_advmss,
137 .default_mtu = ip6_default_mtu,
138 .cow_metrics = ipv6_cow_metrics,
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
144 .local_out = __ip6_local_out,
147 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
149 return 0;
152 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
156 static struct dst_ops ip6_dst_blackhole_ops = {
157 .family = AF_INET6,
158 .protocol = cpu_to_be16(ETH_P_IPV6),
159 .destroy = ip6_dst_destroy,
160 .check = ip6_dst_check,
161 .default_mtu = ip6_blackhole_default_mtu,
162 .default_advmss = ip6_default_advmss,
163 .update_pmtu = ip6_rt_blackhole_update_pmtu,
166 static const u32 ip6_template_metrics[RTAX_MAX] = {
167 [RTAX_HOPLIMIT - 1] = 255,
170 static struct rt6_info ip6_null_entry_template = {
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -ENETUNREACH,
176 .input = ip6_pkt_discard,
177 .output = ip6_pkt_discard_out,
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_protocol = RTPROT_KERNEL,
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
185 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
187 static int ip6_pkt_prohibit(struct sk_buff *skb);
188 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
190 static struct rt6_info ip6_prohibit_entry_template = {
191 .dst = {
192 .__refcnt = ATOMIC_INIT(1),
193 .__use = 1,
194 .obsolete = -1,
195 .error = -EACCES,
196 .input = ip6_pkt_prohibit,
197 .output = ip6_pkt_prohibit_out,
199 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
200 .rt6i_protocol = RTPROT_KERNEL,
201 .rt6i_metric = ~(u32) 0,
202 .rt6i_ref = ATOMIC_INIT(1),
205 static struct rt6_info ip6_blk_hole_entry_template = {
206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EINVAL,
211 .input = dst_discard,
212 .output = dst_discard,
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
215 .rt6i_protocol = RTPROT_KERNEL,
216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
220 #endif
222 /* allocate dst with ip6_dst_ops */
223 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
225 return (struct rt6_info *)dst_alloc(ops, 0);
228 static void ip6_dst_destroy(struct dst_entry *dst)
230 struct rt6_info *rt = (struct rt6_info *)dst;
231 struct inet6_dev *idev = rt->rt6i_idev;
232 struct inet_peer *peer = rt->rt6i_peer;
234 if (idev != NULL) {
235 rt->rt6i_idev = NULL;
236 in6_dev_put(idev);
238 if (peer) {
239 rt->rt6i_peer = NULL;
240 inet_putpeer(peer);
244 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
246 static u32 rt6_peer_genid(void)
248 return atomic_read(&__rt6_peer_genid);
251 void rt6_bind_peer(struct rt6_info *rt, int create)
253 struct inet_peer *peer;
255 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
256 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
257 inet_putpeer(peer);
258 else
259 rt->rt6i_peer_genid = rt6_peer_genid();
262 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
263 int how)
265 struct rt6_info *rt = (struct rt6_info *)dst;
266 struct inet6_dev *idev = rt->rt6i_idev;
267 struct net_device *loopback_dev =
268 dev_net(dev)->loopback_dev;
270 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
271 struct inet6_dev *loopback_idev =
272 in6_dev_get(loopback_dev);
273 if (loopback_idev != NULL) {
274 rt->rt6i_idev = loopback_idev;
275 in6_dev_put(idev);
280 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
282 return (rt->rt6i_flags & RTF_EXPIRES) &&
283 time_after(jiffies, rt->rt6i_expires);
286 static inline int rt6_need_strict(struct in6_addr *daddr)
288 return ipv6_addr_type(daddr) &
289 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
293 * Route lookup. Any table->tb6_lock is implied.
296 static inline struct rt6_info *rt6_device_match(struct net *net,
297 struct rt6_info *rt,
298 struct in6_addr *saddr,
299 int oif,
300 int flags)
302 struct rt6_info *local = NULL;
303 struct rt6_info *sprt;
305 if (!oif && ipv6_addr_any(saddr))
306 goto out;
308 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
309 struct net_device *dev = sprt->rt6i_dev;
311 if (oif) {
312 if (dev->ifindex == oif)
313 return sprt;
314 if (dev->flags & IFF_LOOPBACK) {
315 if (sprt->rt6i_idev == NULL ||
316 sprt->rt6i_idev->dev->ifindex != oif) {
317 if (flags & RT6_LOOKUP_F_IFACE && oif)
318 continue;
319 if (local && (!oif ||
320 local->rt6i_idev->dev->ifindex == oif))
321 continue;
323 local = sprt;
325 } else {
326 if (ipv6_chk_addr(net, saddr, dev,
327 flags & RT6_LOOKUP_F_IFACE))
328 return sprt;
332 if (oif) {
333 if (local)
334 return local;
336 if (flags & RT6_LOOKUP_F_IFACE)
337 return net->ipv6.ip6_null_entry;
339 out:
340 return rt;
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344 static void rt6_probe(struct rt6_info *rt)
346 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
348 * Okay, this does not seem to be appropriate
349 * for now, however, we need to check if it
350 * is really so; aka Router Reachability Probing.
352 * Router Reachability Probe MUST be rate-limited
353 * to no more than one per minute.
355 if (!neigh || (neigh->nud_state & NUD_VALID))
356 return;
357 read_lock_bh(&neigh->lock);
358 if (!(neigh->nud_state & NUD_VALID) &&
359 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
360 struct in6_addr mcaddr;
361 struct in6_addr *target;
363 neigh->updated = jiffies;
364 read_unlock_bh(&neigh->lock);
366 target = (struct in6_addr *)&neigh->primary_key;
367 addrconf_addr_solict_mult(target, &mcaddr);
368 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
369 } else
370 read_unlock_bh(&neigh->lock);
372 #else
373 static inline void rt6_probe(struct rt6_info *rt)
376 #endif
379 * Default Router Selection (RFC 2461 6.3.6)
381 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
383 struct net_device *dev = rt->rt6i_dev;
384 if (!oif || dev->ifindex == oif)
385 return 2;
386 if ((dev->flags & IFF_LOOPBACK) &&
387 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
388 return 1;
389 return 0;
392 static inline int rt6_check_neigh(struct rt6_info *rt)
394 struct neighbour *neigh = rt->rt6i_nexthop;
395 int m;
396 if (rt->rt6i_flags & RTF_NONEXTHOP ||
397 !(rt->rt6i_flags & RTF_GATEWAY))
398 m = 1;
399 else if (neigh) {
400 read_lock_bh(&neigh->lock);
401 if (neigh->nud_state & NUD_VALID)
402 m = 2;
403 #ifdef CONFIG_IPV6_ROUTER_PREF
404 else if (neigh->nud_state & NUD_FAILED)
405 m = 0;
406 #endif
407 else
408 m = 1;
409 read_unlock_bh(&neigh->lock);
410 } else
411 m = 0;
412 return m;
415 static int rt6_score_route(struct rt6_info *rt, int oif,
416 int strict)
418 int m, n;
420 m = rt6_check_dev(rt, oif);
421 if (!m && (strict & RT6_LOOKUP_F_IFACE))
422 return -1;
423 #ifdef CONFIG_IPV6_ROUTER_PREF
424 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
425 #endif
426 n = rt6_check_neigh(rt);
427 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
428 return -1;
429 return m;
432 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
433 int *mpri, struct rt6_info *match)
435 int m;
437 if (rt6_check_expired(rt))
438 goto out;
440 m = rt6_score_route(rt, oif, strict);
441 if (m < 0)
442 goto out;
444 if (m > *mpri) {
445 if (strict & RT6_LOOKUP_F_REACHABLE)
446 rt6_probe(match);
447 *mpri = m;
448 match = rt;
449 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
450 rt6_probe(rt);
453 out:
454 return match;
457 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
458 struct rt6_info *rr_head,
459 u32 metric, int oif, int strict)
461 struct rt6_info *rt, *match;
462 int mpri = -1;
464 match = NULL;
465 for (rt = rr_head; rt && rt->rt6i_metric == metric;
466 rt = rt->dst.rt6_next)
467 match = find_match(rt, oif, strict, &mpri, match);
468 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
469 rt = rt->dst.rt6_next)
470 match = find_match(rt, oif, strict, &mpri, match);
472 return match;
475 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
477 struct rt6_info *match, *rt0;
478 struct net *net;
480 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
481 __func__, fn->leaf, oif);
483 rt0 = fn->rr_ptr;
484 if (!rt0)
485 fn->rr_ptr = rt0 = fn->leaf;
487 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
489 if (!match &&
490 (strict & RT6_LOOKUP_F_REACHABLE)) {
491 struct rt6_info *next = rt0->dst.rt6_next;
493 /* no entries matched; do round-robin */
494 if (!next || next->rt6i_metric != rt0->rt6i_metric)
495 next = fn->leaf;
497 if (next != rt0)
498 fn->rr_ptr = next;
501 RT6_TRACE("%s() => %p\n",
502 __func__, match);
504 net = dev_net(rt0->rt6i_dev);
505 return match ? match : net->ipv6.ip6_null_entry;
508 #ifdef CONFIG_IPV6_ROUTE_INFO
509 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
510 struct in6_addr *gwaddr)
512 struct net *net = dev_net(dev);
513 struct route_info *rinfo = (struct route_info *) opt;
514 struct in6_addr prefix_buf, *prefix;
515 unsigned int pref;
516 unsigned long lifetime;
517 struct rt6_info *rt;
519 if (len < sizeof(struct route_info)) {
520 return -EINVAL;
523 /* Sanity check for prefix_len and length */
524 if (rinfo->length > 3) {
525 return -EINVAL;
526 } else if (rinfo->prefix_len > 128) {
527 return -EINVAL;
528 } else if (rinfo->prefix_len > 64) {
529 if (rinfo->length < 2) {
530 return -EINVAL;
532 } else if (rinfo->prefix_len > 0) {
533 if (rinfo->length < 1) {
534 return -EINVAL;
538 pref = rinfo->route_pref;
539 if (pref == ICMPV6_ROUTER_PREF_INVALID)
540 return -EINVAL;
542 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
544 if (rinfo->length == 3)
545 prefix = (struct in6_addr *)rinfo->prefix;
546 else {
547 /* this function is safe */
548 ipv6_addr_prefix(&prefix_buf,
549 (struct in6_addr *)rinfo->prefix,
550 rinfo->prefix_len);
551 prefix = &prefix_buf;
554 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
555 dev->ifindex);
557 if (rt && !lifetime) {
558 ip6_del_rt(rt);
559 rt = NULL;
562 if (!rt && lifetime)
563 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
564 pref);
565 else if (rt)
566 rt->rt6i_flags = RTF_ROUTEINFO |
567 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
569 if (rt) {
570 if (!addrconf_finite_timeout(lifetime)) {
571 rt->rt6i_flags &= ~RTF_EXPIRES;
572 } else {
573 rt->rt6i_expires = jiffies + HZ * lifetime;
574 rt->rt6i_flags |= RTF_EXPIRES;
576 dst_release(&rt->dst);
578 return 0;
580 #endif
582 #define BACKTRACK(__net, saddr) \
583 do { \
584 if (rt == __net->ipv6.ip6_null_entry) { \
585 struct fib6_node *pn; \
586 while (1) { \
587 if (fn->fn_flags & RTN_TL_ROOT) \
588 goto out; \
589 pn = fn->parent; \
590 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
591 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
592 else \
593 fn = pn; \
594 if (fn->fn_flags & RTN_RTINFO) \
595 goto restart; \
598 } while(0)
600 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
601 struct fib6_table *table,
602 struct flowi6 *fl6, int flags)
604 struct fib6_node *fn;
605 struct rt6_info *rt;
607 read_lock_bh(&table->tb6_lock);
608 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
609 restart:
610 rt = fn->leaf;
611 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
612 BACKTRACK(net, &fl6->saddr);
613 out:
614 dst_use(&rt->dst, jiffies);
615 read_unlock_bh(&table->tb6_lock);
616 return rt;
620 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
621 const struct in6_addr *saddr, int oif, int strict)
623 struct flowi6 fl6 = {
624 .flowi6_oif = oif,
625 .daddr = *daddr,
627 struct dst_entry *dst;
628 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
630 if (saddr) {
631 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
632 flags |= RT6_LOOKUP_F_HAS_SADDR;
635 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
636 if (dst->error == 0)
637 return (struct rt6_info *) dst;
639 dst_release(dst);
641 return NULL;
644 EXPORT_SYMBOL(rt6_lookup);
646 /* ip6_ins_rt is called with FREE table->tb6_lock.
647 It takes new route entry, the addition fails by any reason the
648 route is freed. In any case, if caller does not hold it, it may
649 be destroyed.
652 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
654 int err;
655 struct fib6_table *table;
657 table = rt->rt6i_table;
658 write_lock_bh(&table->tb6_lock);
659 err = fib6_add(&table->tb6_root, rt, info);
660 write_unlock_bh(&table->tb6_lock);
662 return err;
665 int ip6_ins_rt(struct rt6_info *rt)
667 struct nl_info info = {
668 .nl_net = dev_net(rt->rt6i_dev),
670 return __ip6_ins_rt(rt, &info);
673 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
674 struct in6_addr *saddr)
676 struct rt6_info *rt;
679 * Clone the route.
682 rt = ip6_rt_copy(ort);
684 if (rt) {
685 struct neighbour *neigh;
686 int attempts = !in_softirq();
688 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
689 if (rt->rt6i_dst.plen != 128 &&
690 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
691 rt->rt6i_flags |= RTF_ANYCAST;
692 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
696 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE;
698 rt->dst.flags |= DST_HOST;
700 #ifdef CONFIG_IPV6_SUBTREES
701 if (rt->rt6i_src.plen && saddr) {
702 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
703 rt->rt6i_src.plen = 128;
705 #endif
707 retry:
708 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
709 if (IS_ERR(neigh)) {
710 struct net *net = dev_net(rt->rt6i_dev);
711 int saved_rt_min_interval =
712 net->ipv6.sysctl.ip6_rt_gc_min_interval;
713 int saved_rt_elasticity =
714 net->ipv6.sysctl.ip6_rt_gc_elasticity;
716 if (attempts-- > 0) {
717 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
718 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
720 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
722 net->ipv6.sysctl.ip6_rt_gc_elasticity =
723 saved_rt_elasticity;
724 net->ipv6.sysctl.ip6_rt_gc_min_interval =
725 saved_rt_min_interval;
726 goto retry;
729 if (net_ratelimit())
730 printk(KERN_WARNING
731 "ipv6: Neighbour table overflow.\n");
732 dst_free(&rt->dst);
733 return NULL;
735 rt->rt6i_nexthop = neigh;
739 return rt;
742 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
744 struct rt6_info *rt = ip6_rt_copy(ort);
745 if (rt) {
746 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
747 rt->rt6i_dst.plen = 128;
748 rt->rt6i_flags |= RTF_CACHE;
749 rt->dst.flags |= DST_HOST;
750 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
752 return rt;
755 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
756 struct flowi6 *fl6, int flags)
758 struct fib6_node *fn;
759 struct rt6_info *rt, *nrt;
760 int strict = 0;
761 int attempts = 3;
762 int err;
763 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
765 strict |= flags & RT6_LOOKUP_F_IFACE;
767 relookup:
768 read_lock_bh(&table->tb6_lock);
770 restart_2:
771 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
773 restart:
774 rt = rt6_select(fn, oif, strict | reachable);
776 BACKTRACK(net, &fl6->saddr);
777 if (rt == net->ipv6.ip6_null_entry ||
778 rt->rt6i_flags & RTF_CACHE)
779 goto out;
781 dst_hold(&rt->dst);
782 read_unlock_bh(&table->tb6_lock);
784 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
785 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
786 else if (!(rt->dst.flags & DST_HOST))
787 nrt = rt6_alloc_clone(rt, &fl6->daddr);
788 else
789 goto out2;
791 dst_release(&rt->dst);
792 rt = nrt ? : net->ipv6.ip6_null_entry;
794 dst_hold(&rt->dst);
795 if (nrt) {
796 err = ip6_ins_rt(nrt);
797 if (!err)
798 goto out2;
801 if (--attempts <= 0)
802 goto out2;
805 * Race condition! In the gap, when table->tb6_lock was
806 * released someone could insert this route. Relookup.
808 dst_release(&rt->dst);
809 goto relookup;
811 out:
812 if (reachable) {
813 reachable = 0;
814 goto restart_2;
816 dst_hold(&rt->dst);
817 read_unlock_bh(&table->tb6_lock);
818 out2:
819 rt->dst.lastuse = jiffies;
820 rt->dst.__use++;
822 return rt;
825 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
826 struct flowi6 *fl6, int flags)
828 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
831 void ip6_route_input(struct sk_buff *skb)
833 struct ipv6hdr *iph = ipv6_hdr(skb);
834 struct net *net = dev_net(skb->dev);
835 int flags = RT6_LOOKUP_F_HAS_SADDR;
836 struct flowi6 fl6 = {
837 .flowi6_iif = skb->dev->ifindex,
838 .daddr = iph->daddr,
839 .saddr = iph->saddr,
840 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
841 .flowi6_mark = skb->mark,
842 .flowi6_proto = iph->nexthdr,
845 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
846 flags |= RT6_LOOKUP_F_IFACE;
848 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
851 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
852 struct flowi6 *fl6, int flags)
854 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
857 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
858 struct flowi6 *fl6)
860 int flags = 0;
862 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
863 flags |= RT6_LOOKUP_F_IFACE;
865 if (!ipv6_addr_any(&fl6->saddr))
866 flags |= RT6_LOOKUP_F_HAS_SADDR;
867 else if (sk)
868 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
870 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
873 EXPORT_SYMBOL(ip6_route_output);
875 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
877 struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
878 struct rt6_info *ort = (struct rt6_info *) dst_orig;
879 struct dst_entry *new = NULL;
881 if (rt) {
882 new = &rt->dst;
884 new->__use = 1;
885 new->input = dst_discard;
886 new->output = dst_discard;
888 dst_copy_metrics(new, &ort->dst);
889 new->dev = ort->dst.dev;
890 if (new->dev)
891 dev_hold(new->dev);
892 rt->rt6i_idev = ort->rt6i_idev;
893 if (rt->rt6i_idev)
894 in6_dev_hold(rt->rt6i_idev);
895 rt->rt6i_expires = 0;
897 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
898 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
899 rt->rt6i_metric = 0;
901 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
902 #ifdef CONFIG_IPV6_SUBTREES
903 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
904 #endif
906 dst_free(new);
909 dst_release(dst_orig);
910 return new ? new : ERR_PTR(-ENOMEM);
914 * Destination cache support functions
917 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
919 struct rt6_info *rt;
921 rt = (struct rt6_info *) dst;
923 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
924 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
925 if (!rt->rt6i_peer)
926 rt6_bind_peer(rt, 0);
927 rt->rt6i_peer_genid = rt6_peer_genid();
929 return dst;
931 return NULL;
934 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
936 struct rt6_info *rt = (struct rt6_info *) dst;
938 if (rt) {
939 if (rt->rt6i_flags & RTF_CACHE) {
940 if (rt6_check_expired(rt)) {
941 ip6_del_rt(rt);
942 dst = NULL;
944 } else {
945 dst_release(dst);
946 dst = NULL;
949 return dst;
952 static void ip6_link_failure(struct sk_buff *skb)
954 struct rt6_info *rt;
956 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
958 rt = (struct rt6_info *) skb_dst(skb);
959 if (rt) {
960 if (rt->rt6i_flags&RTF_CACHE) {
961 dst_set_expires(&rt->dst, 0);
962 rt->rt6i_flags |= RTF_EXPIRES;
963 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
964 rt->rt6i_node->fn_sernum = -1;
968 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
970 struct rt6_info *rt6 = (struct rt6_info*)dst;
972 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
973 rt6->rt6i_flags |= RTF_MODIFIED;
974 if (mtu < IPV6_MIN_MTU) {
975 u32 features = dst_metric(dst, RTAX_FEATURES);
976 mtu = IPV6_MIN_MTU;
977 features |= RTAX_FEATURE_ALLFRAG;
978 dst_metric_set(dst, RTAX_FEATURES, features);
980 dst_metric_set(dst, RTAX_MTU, mtu);
984 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
986 struct net_device *dev = dst->dev;
987 unsigned int mtu = dst_mtu(dst);
988 struct net *net = dev_net(dev);
990 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
992 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
993 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
996 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
997 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
998 * IPV6_MAXPLEN is also valid and means: "any MSS,
999 * rely only on pmtu discovery"
1001 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1002 mtu = IPV6_MAXPLEN;
1003 return mtu;
1006 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1008 unsigned int mtu = IPV6_MIN_MTU;
1009 struct inet6_dev *idev;
1011 rcu_read_lock();
1012 idev = __in6_dev_get(dst->dev);
1013 if (idev)
1014 mtu = idev->cnf.mtu6;
1015 rcu_read_unlock();
1017 return mtu;
1020 static struct dst_entry *icmp6_dst_gc_list;
1021 static DEFINE_SPINLOCK(icmp6_dst_lock);
1023 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1024 struct neighbour *neigh,
1025 const struct in6_addr *addr)
1027 struct rt6_info *rt;
1028 struct inet6_dev *idev = in6_dev_get(dev);
1029 struct net *net = dev_net(dev);
1031 if (unlikely(idev == NULL))
1032 return NULL;
1034 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1035 if (unlikely(rt == NULL)) {
1036 in6_dev_put(idev);
1037 goto out;
1040 dev_hold(dev);
1041 if (neigh)
1042 neigh_hold(neigh);
1043 else {
1044 neigh = ndisc_get_neigh(dev, addr);
1045 if (IS_ERR(neigh))
1046 neigh = NULL;
1049 rt->rt6i_dev = dev;
1050 rt->rt6i_idev = idev;
1051 rt->rt6i_nexthop = neigh;
1052 atomic_set(&rt->dst.__refcnt, 1);
1053 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1054 rt->dst.output = ip6_output;
1056 #if 0 /* there's no chance to use these for ndisc */
1057 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1058 ? DST_HOST
1059 : 0;
1060 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1061 rt->rt6i_dst.plen = 128;
1062 #endif
1064 spin_lock_bh(&icmp6_dst_lock);
1065 rt->dst.next = icmp6_dst_gc_list;
1066 icmp6_dst_gc_list = &rt->dst;
1067 spin_unlock_bh(&icmp6_dst_lock);
1069 fib6_force_start_gc(net);
1071 out:
1072 return &rt->dst;
1075 int icmp6_dst_gc(void)
1077 struct dst_entry *dst, **pprev;
1078 int more = 0;
1080 spin_lock_bh(&icmp6_dst_lock);
1081 pprev = &icmp6_dst_gc_list;
1083 while ((dst = *pprev) != NULL) {
1084 if (!atomic_read(&dst->__refcnt)) {
1085 *pprev = dst->next;
1086 dst_free(dst);
1087 } else {
1088 pprev = &dst->next;
1089 ++more;
1093 spin_unlock_bh(&icmp6_dst_lock);
1095 return more;
1098 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1099 void *arg)
1101 struct dst_entry *dst, **pprev;
1103 spin_lock_bh(&icmp6_dst_lock);
1104 pprev = &icmp6_dst_gc_list;
1105 while ((dst = *pprev) != NULL) {
1106 struct rt6_info *rt = (struct rt6_info *) dst;
1107 if (func(rt, arg)) {
1108 *pprev = dst->next;
1109 dst_free(dst);
1110 } else {
1111 pprev = &dst->next;
1114 spin_unlock_bh(&icmp6_dst_lock);
1117 static int ip6_dst_gc(struct dst_ops *ops)
1119 unsigned long now = jiffies;
1120 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1121 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1122 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1123 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1124 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1125 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1126 int entries;
1128 entries = dst_entries_get_fast(ops);
1129 if (time_after(rt_last_gc + rt_min_interval, now) &&
1130 entries <= rt_max_size)
1131 goto out;
1133 net->ipv6.ip6_rt_gc_expire++;
1134 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1135 net->ipv6.ip6_rt_last_gc = now;
1136 entries = dst_entries_get_slow(ops);
1137 if (entries < ops->gc_thresh)
1138 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1139 out:
1140 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1141 return entries > rt_max_size;
1144 /* Clean host part of a prefix. Not necessary in radix tree,
1145 but results in cleaner routing tables.
1147 Remove it only when all the things will work!
1150 int ip6_dst_hoplimit(struct dst_entry *dst)
1152 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1153 if (hoplimit == 0) {
1154 struct net_device *dev = dst->dev;
1155 struct inet6_dev *idev;
1157 rcu_read_lock();
1158 idev = __in6_dev_get(dev);
1159 if (idev)
1160 hoplimit = idev->cnf.hop_limit;
1161 else
1162 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1163 rcu_read_unlock();
1165 return hoplimit;
1167 EXPORT_SYMBOL(ip6_dst_hoplimit);
1173 int ip6_route_add(struct fib6_config *cfg)
1175 int err;
1176 struct net *net = cfg->fc_nlinfo.nl_net;
1177 struct rt6_info *rt = NULL;
1178 struct net_device *dev = NULL;
1179 struct inet6_dev *idev = NULL;
1180 struct fib6_table *table;
1181 int addr_type;
1183 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1184 return -EINVAL;
1185 #ifndef CONFIG_IPV6_SUBTREES
1186 if (cfg->fc_src_len)
1187 return -EINVAL;
1188 #endif
1189 if (cfg->fc_ifindex) {
1190 err = -ENODEV;
1191 dev = dev_get_by_index(net, cfg->fc_ifindex);
1192 if (!dev)
1193 goto out;
1194 idev = in6_dev_get(dev);
1195 if (!idev)
1196 goto out;
1199 if (cfg->fc_metric == 0)
1200 cfg->fc_metric = IP6_RT_PRIO_USER;
1202 table = fib6_new_table(net, cfg->fc_table);
1203 if (table == NULL) {
1204 err = -ENOBUFS;
1205 goto out;
1208 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1210 if (rt == NULL) {
1211 err = -ENOMEM;
1212 goto out;
1215 rt->dst.obsolete = -1;
1216 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1217 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1220 if (cfg->fc_protocol == RTPROT_UNSPEC)
1221 cfg->fc_protocol = RTPROT_BOOT;
1222 rt->rt6i_protocol = cfg->fc_protocol;
1224 addr_type = ipv6_addr_type(&cfg->fc_dst);
1226 if (addr_type & IPV6_ADDR_MULTICAST)
1227 rt->dst.input = ip6_mc_input;
1228 else if (cfg->fc_flags & RTF_LOCAL)
1229 rt->dst.input = ip6_input;
1230 else
1231 rt->dst.input = ip6_forward;
1233 rt->dst.output = ip6_output;
1235 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1236 rt->rt6i_dst.plen = cfg->fc_dst_len;
1237 if (rt->rt6i_dst.plen == 128)
1238 rt->dst.flags = DST_HOST;
1240 #ifdef CONFIG_IPV6_SUBTREES
1241 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1242 rt->rt6i_src.plen = cfg->fc_src_len;
1243 #endif
1245 rt->rt6i_metric = cfg->fc_metric;
1247 /* We cannot add true routes via loopback here,
1248 they would result in kernel looping; promote them to reject routes
1250 if ((cfg->fc_flags & RTF_REJECT) ||
1251 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1252 && !(cfg->fc_flags&RTF_LOCAL))) {
1253 /* hold loopback dev/idev if we haven't done so. */
1254 if (dev != net->loopback_dev) {
1255 if (dev) {
1256 dev_put(dev);
1257 in6_dev_put(idev);
1259 dev = net->loopback_dev;
1260 dev_hold(dev);
1261 idev = in6_dev_get(dev);
1262 if (!idev) {
1263 err = -ENODEV;
1264 goto out;
1267 rt->dst.output = ip6_pkt_discard_out;
1268 rt->dst.input = ip6_pkt_discard;
1269 rt->dst.error = -ENETUNREACH;
1270 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1271 goto install_route;
1274 if (cfg->fc_flags & RTF_GATEWAY) {
1275 struct in6_addr *gw_addr;
1276 int gwa_type;
1278 gw_addr = &cfg->fc_gateway;
1279 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1280 gwa_type = ipv6_addr_type(gw_addr);
1282 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1283 struct rt6_info *grt;
1285 /* IPv6 strictly inhibits using not link-local
1286 addresses as nexthop address.
1287 Otherwise, router will not able to send redirects.
1288 It is very good, but in some (rare!) circumstances
1289 (SIT, PtP, NBMA NOARP links) it is handy to allow
1290 some exceptions. --ANK
1292 err = -EINVAL;
1293 if (!(gwa_type&IPV6_ADDR_UNICAST))
1294 goto out;
1296 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1298 err = -EHOSTUNREACH;
1299 if (grt == NULL)
1300 goto out;
1301 if (dev) {
1302 if (dev != grt->rt6i_dev) {
1303 dst_release(&grt->dst);
1304 goto out;
1306 } else {
1307 dev = grt->rt6i_dev;
1308 idev = grt->rt6i_idev;
1309 dev_hold(dev);
1310 in6_dev_hold(grt->rt6i_idev);
1312 if (!(grt->rt6i_flags&RTF_GATEWAY))
1313 err = 0;
1314 dst_release(&grt->dst);
1316 if (err)
1317 goto out;
1319 err = -EINVAL;
1320 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1321 goto out;
1324 err = -ENODEV;
1325 if (dev == NULL)
1326 goto out;
1328 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1329 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1330 if (IS_ERR(rt->rt6i_nexthop)) {
1331 err = PTR_ERR(rt->rt6i_nexthop);
1332 rt->rt6i_nexthop = NULL;
1333 goto out;
1337 rt->rt6i_flags = cfg->fc_flags;
1339 install_route:
1340 if (cfg->fc_mx) {
1341 struct nlattr *nla;
1342 int remaining;
1344 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1345 int type = nla_type(nla);
1347 if (type) {
1348 if (type > RTAX_MAX) {
1349 err = -EINVAL;
1350 goto out;
1353 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1358 rt->dst.dev = dev;
1359 rt->rt6i_idev = idev;
1360 rt->rt6i_table = table;
1362 cfg->fc_nlinfo.nl_net = dev_net(dev);
1364 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1366 out:
1367 if (dev)
1368 dev_put(dev);
1369 if (idev)
1370 in6_dev_put(idev);
1371 if (rt)
1372 dst_free(&rt->dst);
1373 return err;
1376 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1378 int err;
1379 struct fib6_table *table;
1380 struct net *net = dev_net(rt->rt6i_dev);
1382 if (rt == net->ipv6.ip6_null_entry)
1383 return -ENOENT;
1385 table = rt->rt6i_table;
1386 write_lock_bh(&table->tb6_lock);
1388 err = fib6_del(rt, info);
1389 dst_release(&rt->dst);
1391 write_unlock_bh(&table->tb6_lock);
1393 return err;
1396 int ip6_del_rt(struct rt6_info *rt)
1398 struct nl_info info = {
1399 .nl_net = dev_net(rt->rt6i_dev),
1401 return __ip6_del_rt(rt, &info);
1404 static int ip6_route_del(struct fib6_config *cfg)
1406 struct fib6_table *table;
1407 struct fib6_node *fn;
1408 struct rt6_info *rt;
1409 int err = -ESRCH;
1411 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1412 if (table == NULL)
1413 return err;
1415 read_lock_bh(&table->tb6_lock);
1417 fn = fib6_locate(&table->tb6_root,
1418 &cfg->fc_dst, cfg->fc_dst_len,
1419 &cfg->fc_src, cfg->fc_src_len);
1421 if (fn) {
1422 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1423 if (cfg->fc_ifindex &&
1424 (rt->rt6i_dev == NULL ||
1425 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1426 continue;
1427 if (cfg->fc_flags & RTF_GATEWAY &&
1428 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1429 continue;
1430 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1431 continue;
1432 dst_hold(&rt->dst);
1433 read_unlock_bh(&table->tb6_lock);
1435 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1438 read_unlock_bh(&table->tb6_lock);
1440 return err;
1444 * Handle redirects
1446 struct ip6rd_flowi {
1447 struct flowi6 fl6;
1448 struct in6_addr gateway;
1451 static struct rt6_info *__ip6_route_redirect(struct net *net,
1452 struct fib6_table *table,
1453 struct flowi6 *fl6,
1454 int flags)
1456 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1457 struct rt6_info *rt;
1458 struct fib6_node *fn;
1461 * Get the "current" route for this destination and
1462 * check if the redirect has come from approriate router.
1464 * RFC 2461 specifies that redirects should only be
1465 * accepted if they come from the nexthop to the target.
1466 * Due to the way the routes are chosen, this notion
1467 * is a bit fuzzy and one might need to check all possible
1468 * routes.
1471 read_lock_bh(&table->tb6_lock);
1472 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1473 restart:
1474 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1476 * Current route is on-link; redirect is always invalid.
1478 * Seems, previous statement is not true. It could
1479 * be node, which looks for us as on-link (f.e. proxy ndisc)
1480 * But then router serving it might decide, that we should
1481 * know truth 8)8) --ANK (980726).
1483 if (rt6_check_expired(rt))
1484 continue;
1485 if (!(rt->rt6i_flags & RTF_GATEWAY))
1486 continue;
1487 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1488 continue;
1489 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1490 continue;
1491 break;
1494 if (!rt)
1495 rt = net->ipv6.ip6_null_entry;
1496 BACKTRACK(net, &fl6->saddr);
1497 out:
1498 dst_hold(&rt->dst);
1500 read_unlock_bh(&table->tb6_lock);
1502 return rt;
1505 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1506 struct in6_addr *src,
1507 struct in6_addr *gateway,
1508 struct net_device *dev)
1510 int flags = RT6_LOOKUP_F_HAS_SADDR;
1511 struct net *net = dev_net(dev);
1512 struct ip6rd_flowi rdfl = {
1513 .fl6 = {
1514 .flowi6_oif = dev->ifindex,
1515 .daddr = *dest,
1516 .saddr = *src,
1520 ipv6_addr_copy(&rdfl.gateway, gateway);
1522 if (rt6_need_strict(dest))
1523 flags |= RT6_LOOKUP_F_IFACE;
1525 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1526 flags, __ip6_route_redirect);
1529 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1530 struct in6_addr *saddr,
1531 struct neighbour *neigh, u8 *lladdr, int on_link)
1533 struct rt6_info *rt, *nrt = NULL;
1534 struct netevent_redirect netevent;
1535 struct net *net = dev_net(neigh->dev);
1537 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1539 if (rt == net->ipv6.ip6_null_entry) {
1540 if (net_ratelimit())
1541 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1542 "for redirect target\n");
1543 goto out;
1547 * We have finally decided to accept it.
1550 neigh_update(neigh, lladdr, NUD_STALE,
1551 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1552 NEIGH_UPDATE_F_OVERRIDE|
1553 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1554 NEIGH_UPDATE_F_ISROUTER))
1558 * Redirect received -> path was valid.
1559 * Look, redirects are sent only in response to data packets,
1560 * so that this nexthop apparently is reachable. --ANK
1562 dst_confirm(&rt->dst);
1564 /* Duplicate redirect: silently ignore. */
1565 if (neigh == rt->dst.neighbour)
1566 goto out;
1568 nrt = ip6_rt_copy(rt);
1569 if (nrt == NULL)
1570 goto out;
1572 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1573 if (on_link)
1574 nrt->rt6i_flags &= ~RTF_GATEWAY;
1576 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1577 nrt->rt6i_dst.plen = 128;
1578 nrt->dst.flags |= DST_HOST;
1580 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1581 nrt->rt6i_nexthop = neigh_clone(neigh);
1583 if (ip6_ins_rt(nrt))
1584 goto out;
1586 netevent.old = &rt->dst;
1587 netevent.new = &nrt->dst;
1588 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1590 if (rt->rt6i_flags&RTF_CACHE) {
1591 ip6_del_rt(rt);
1592 return;
1595 out:
1596 dst_release(&rt->dst);
1600 * Handle ICMP "packet too big" messages
1601 * i.e. Path MTU discovery
1604 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1605 struct net *net, u32 pmtu, int ifindex)
1607 struct rt6_info *rt, *nrt;
1608 int allfrag = 0;
1609 again:
1610 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1611 if (rt == NULL)
1612 return;
1614 if (rt6_check_expired(rt)) {
1615 ip6_del_rt(rt);
1616 goto again;
1619 if (pmtu >= dst_mtu(&rt->dst))
1620 goto out;
1622 if (pmtu < IPV6_MIN_MTU) {
1624 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1625 * MTU (1280) and a fragment header should always be included
1626 * after a node receiving Too Big message reporting PMTU is
1627 * less than the IPv6 Minimum Link MTU.
1629 pmtu = IPV6_MIN_MTU;
1630 allfrag = 1;
1633 /* New mtu received -> path was valid.
1634 They are sent only in response to data packets,
1635 so that this nexthop apparently is reachable. --ANK
1637 dst_confirm(&rt->dst);
1639 /* Host route. If it is static, it would be better
1640 not to override it, but add new one, so that
1641 when cache entry will expire old pmtu
1642 would return automatically.
1644 if (rt->rt6i_flags & RTF_CACHE) {
1645 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1646 if (allfrag) {
1647 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1648 features |= RTAX_FEATURE_ALLFRAG;
1649 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1651 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1652 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1653 goto out;
1656 /* Network route.
1657 Two cases are possible:
1658 1. It is connected route. Action: COW
1659 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1661 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1662 nrt = rt6_alloc_cow(rt, daddr, saddr);
1663 else
1664 nrt = rt6_alloc_clone(rt, daddr);
1666 if (nrt) {
1667 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1668 if (allfrag) {
1669 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1670 features |= RTAX_FEATURE_ALLFRAG;
1671 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1674 /* According to RFC 1981, detecting PMTU increase shouldn't be
1675 * happened within 5 mins, the recommended timer is 10 mins.
1676 * Here this route expiration time is set to ip6_rt_mtu_expires
1677 * which is 10 mins. After 10 mins the decreased pmtu is expired
1678 * and detecting PMTU increase will be automatically happened.
1680 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1681 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1683 ip6_ins_rt(nrt);
1685 out:
1686 dst_release(&rt->dst);
1689 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1690 struct net_device *dev, u32 pmtu)
1692 struct net *net = dev_net(dev);
1695 * RFC 1981 states that a node "MUST reduce the size of the packets it
1696 * is sending along the path" that caused the Packet Too Big message.
1697 * Since it's not possible in the general case to determine which
1698 * interface was used to send the original packet, we update the MTU
1699 * on the interface that will be used to send future packets. We also
1700 * update the MTU on the interface that received the Packet Too Big in
1701 * case the original packet was forced out that interface with
1702 * SO_BINDTODEVICE or similar. This is the next best thing to the
1703 * correct behaviour, which would be to update the MTU on all
1704 * interfaces.
1706 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1707 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1711 * Misc support functions
1714 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1716 struct net *net = dev_net(ort->rt6i_dev);
1717 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1719 if (rt) {
1720 rt->dst.input = ort->dst.input;
1721 rt->dst.output = ort->dst.output;
1723 dst_copy_metrics(&rt->dst, &ort->dst);
1724 rt->dst.error = ort->dst.error;
1725 rt->dst.dev = ort->dst.dev;
1726 if (rt->dst.dev)
1727 dev_hold(rt->dst.dev);
1728 rt->rt6i_idev = ort->rt6i_idev;
1729 if (rt->rt6i_idev)
1730 in6_dev_hold(rt->rt6i_idev);
1731 rt->dst.lastuse = jiffies;
1732 rt->rt6i_expires = 0;
1734 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1735 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1736 rt->rt6i_metric = 0;
1738 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1739 #ifdef CONFIG_IPV6_SUBTREES
1740 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1741 #endif
1742 rt->rt6i_table = ort->rt6i_table;
1744 return rt;
1747 #ifdef CONFIG_IPV6_ROUTE_INFO
1748 static struct rt6_info *rt6_get_route_info(struct net *net,
1749 struct in6_addr *prefix, int prefixlen,
1750 struct in6_addr *gwaddr, int ifindex)
1752 struct fib6_node *fn;
1753 struct rt6_info *rt = NULL;
1754 struct fib6_table *table;
1756 table = fib6_get_table(net, RT6_TABLE_INFO);
1757 if (table == NULL)
1758 return NULL;
1760 write_lock_bh(&table->tb6_lock);
1761 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1762 if (!fn)
1763 goto out;
1765 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1766 if (rt->rt6i_dev->ifindex != ifindex)
1767 continue;
1768 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1769 continue;
1770 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1771 continue;
1772 dst_hold(&rt->dst);
1773 break;
1775 out:
1776 write_unlock_bh(&table->tb6_lock);
1777 return rt;
1780 static struct rt6_info *rt6_add_route_info(struct net *net,
1781 struct in6_addr *prefix, int prefixlen,
1782 struct in6_addr *gwaddr, int ifindex,
1783 unsigned pref)
1785 struct fib6_config cfg = {
1786 .fc_table = RT6_TABLE_INFO,
1787 .fc_metric = IP6_RT_PRIO_USER,
1788 .fc_ifindex = ifindex,
1789 .fc_dst_len = prefixlen,
1790 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1791 RTF_UP | RTF_PREF(pref),
1792 .fc_nlinfo.pid = 0,
1793 .fc_nlinfo.nlh = NULL,
1794 .fc_nlinfo.nl_net = net,
1797 ipv6_addr_copy(&cfg.fc_dst, prefix);
1798 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1800 /* We should treat it as a default route if prefix length is 0. */
1801 if (!prefixlen)
1802 cfg.fc_flags |= RTF_DEFAULT;
1804 ip6_route_add(&cfg);
1806 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1808 #endif
1810 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1812 struct rt6_info *rt;
1813 struct fib6_table *table;
1815 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1816 if (table == NULL)
1817 return NULL;
1819 write_lock_bh(&table->tb6_lock);
1820 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1821 if (dev == rt->rt6i_dev &&
1822 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1823 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1824 break;
1826 if (rt)
1827 dst_hold(&rt->dst);
1828 write_unlock_bh(&table->tb6_lock);
1829 return rt;
1832 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1833 struct net_device *dev,
1834 unsigned int pref)
1836 struct fib6_config cfg = {
1837 .fc_table = RT6_TABLE_DFLT,
1838 .fc_metric = IP6_RT_PRIO_USER,
1839 .fc_ifindex = dev->ifindex,
1840 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1841 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1842 .fc_nlinfo.pid = 0,
1843 .fc_nlinfo.nlh = NULL,
1844 .fc_nlinfo.nl_net = dev_net(dev),
1847 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1849 ip6_route_add(&cfg);
1851 return rt6_get_dflt_router(gwaddr, dev);
1854 void rt6_purge_dflt_routers(struct net *net)
1856 struct rt6_info *rt;
1857 struct fib6_table *table;
1859 /* NOTE: Keep consistent with rt6_get_dflt_router */
1860 table = fib6_get_table(net, RT6_TABLE_DFLT);
1861 if (table == NULL)
1862 return;
1864 restart:
1865 read_lock_bh(&table->tb6_lock);
1866 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1867 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1868 dst_hold(&rt->dst);
1869 read_unlock_bh(&table->tb6_lock);
1870 ip6_del_rt(rt);
1871 goto restart;
1874 read_unlock_bh(&table->tb6_lock);
1877 static void rtmsg_to_fib6_config(struct net *net,
1878 struct in6_rtmsg *rtmsg,
1879 struct fib6_config *cfg)
1881 memset(cfg, 0, sizeof(*cfg));
1883 cfg->fc_table = RT6_TABLE_MAIN;
1884 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1885 cfg->fc_metric = rtmsg->rtmsg_metric;
1886 cfg->fc_expires = rtmsg->rtmsg_info;
1887 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1888 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1889 cfg->fc_flags = rtmsg->rtmsg_flags;
1891 cfg->fc_nlinfo.nl_net = net;
1893 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1894 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1895 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1898 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1900 struct fib6_config cfg;
1901 struct in6_rtmsg rtmsg;
1902 int err;
1904 switch(cmd) {
1905 case SIOCADDRT: /* Add a route */
1906 case SIOCDELRT: /* Delete a route */
1907 if (!capable(CAP_NET_ADMIN))
1908 return -EPERM;
1909 err = copy_from_user(&rtmsg, arg,
1910 sizeof(struct in6_rtmsg));
1911 if (err)
1912 return -EFAULT;
1914 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1916 rtnl_lock();
1917 switch (cmd) {
1918 case SIOCADDRT:
1919 err = ip6_route_add(&cfg);
1920 break;
1921 case SIOCDELRT:
1922 err = ip6_route_del(&cfg);
1923 break;
1924 default:
1925 err = -EINVAL;
1927 rtnl_unlock();
1929 return err;
1932 return -EINVAL;
1936 * Drop the packet on the floor
1939 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1941 int type;
1942 struct dst_entry *dst = skb_dst(skb);
1943 switch (ipstats_mib_noroutes) {
1944 case IPSTATS_MIB_INNOROUTES:
1945 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1946 if (type == IPV6_ADDR_ANY) {
1947 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1948 IPSTATS_MIB_INADDRERRORS);
1949 break;
1951 /* FALLTHROUGH */
1952 case IPSTATS_MIB_OUTNOROUTES:
1953 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1954 ipstats_mib_noroutes);
1955 break;
1957 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1958 kfree_skb(skb);
1959 return 0;
1962 static int ip6_pkt_discard(struct sk_buff *skb)
1964 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1967 static int ip6_pkt_discard_out(struct sk_buff *skb)
1969 skb->dev = skb_dst(skb)->dev;
1970 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1973 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1975 static int ip6_pkt_prohibit(struct sk_buff *skb)
1977 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1980 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1982 skb->dev = skb_dst(skb)->dev;
1983 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1986 #endif
1989 * Allocate a dst for local (unicast / anycast) address.
1992 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1993 const struct in6_addr *addr,
1994 int anycast)
1996 struct net *net = dev_net(idev->dev);
1997 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1998 struct neighbour *neigh;
2000 if (rt == NULL) {
2001 if (net_ratelimit())
2002 pr_warning("IPv6: Maximum number of routes reached,"
2003 " consider increasing route/max_size.\n");
2004 return ERR_PTR(-ENOMEM);
2007 dev_hold(net->loopback_dev);
2008 in6_dev_hold(idev);
2010 rt->dst.flags = DST_HOST;
2011 rt->dst.input = ip6_input;
2012 rt->dst.output = ip6_output;
2013 rt->rt6i_dev = net->loopback_dev;
2014 rt->rt6i_idev = idev;
2015 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
2016 rt->dst.obsolete = -1;
2018 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2019 if (anycast)
2020 rt->rt6i_flags |= RTF_ANYCAST;
2021 else
2022 rt->rt6i_flags |= RTF_LOCAL;
2023 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2024 if (IS_ERR(neigh)) {
2025 dst_free(&rt->dst);
2027 return ERR_CAST(neigh);
2029 rt->rt6i_nexthop = neigh;
2031 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2032 rt->rt6i_dst.plen = 128;
2033 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2035 atomic_set(&rt->dst.__refcnt, 1);
2037 return rt;
2040 struct arg_dev_net {
2041 struct net_device *dev;
2042 struct net *net;
2045 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2047 const struct arg_dev_net *adn = arg;
2048 const struct net_device *dev = adn->dev;
2050 if ((rt->rt6i_dev == dev || dev == NULL) &&
2051 rt != adn->net->ipv6.ip6_null_entry) {
2052 RT6_TRACE("deleted by ifdown %p\n", rt);
2053 return -1;
2055 return 0;
2058 void rt6_ifdown(struct net *net, struct net_device *dev)
2060 struct arg_dev_net adn = {
2061 .dev = dev,
2062 .net = net,
2065 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2066 icmp6_clean_all(fib6_ifdown, &adn);
2069 struct rt6_mtu_change_arg
2071 struct net_device *dev;
2072 unsigned mtu;
2075 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2077 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2078 struct inet6_dev *idev;
2080 /* In IPv6 pmtu discovery is not optional,
2081 so that RTAX_MTU lock cannot disable it.
2082 We still use this lock to block changes
2083 caused by addrconf/ndisc.
2086 idev = __in6_dev_get(arg->dev);
2087 if (idev == NULL)
2088 return 0;
2090 /* For administrative MTU increase, there is no way to discover
2091 IPv6 PMTU increase, so PMTU increase should be updated here.
2092 Since RFC 1981 doesn't include administrative MTU increase
2093 update PMTU increase is a MUST. (i.e. jumbo frame)
2096 If new MTU is less than route PMTU, this new MTU will be the
2097 lowest MTU in the path, update the route PMTU to reflect PMTU
2098 decreases; if new MTU is greater than route PMTU, and the
2099 old MTU is the lowest MTU in the path, update the route PMTU
2100 to reflect the increase. In this case if the other nodes' MTU
2101 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2102 PMTU discouvery.
2104 if (rt->rt6i_dev == arg->dev &&
2105 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2106 (dst_mtu(&rt->dst) >= arg->mtu ||
2107 (dst_mtu(&rt->dst) < arg->mtu &&
2108 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2109 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2111 return 0;
2114 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2116 struct rt6_mtu_change_arg arg = {
2117 .dev = dev,
2118 .mtu = mtu,
2121 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2124 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2125 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2126 [RTA_OIF] = { .type = NLA_U32 },
2127 [RTA_IIF] = { .type = NLA_U32 },
2128 [RTA_PRIORITY] = { .type = NLA_U32 },
2129 [RTA_METRICS] = { .type = NLA_NESTED },
2132 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2133 struct fib6_config *cfg)
2135 struct rtmsg *rtm;
2136 struct nlattr *tb[RTA_MAX+1];
2137 int err;
2139 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2140 if (err < 0)
2141 goto errout;
2143 err = -EINVAL;
2144 rtm = nlmsg_data(nlh);
2145 memset(cfg, 0, sizeof(*cfg));
2147 cfg->fc_table = rtm->rtm_table;
2148 cfg->fc_dst_len = rtm->rtm_dst_len;
2149 cfg->fc_src_len = rtm->rtm_src_len;
2150 cfg->fc_flags = RTF_UP;
2151 cfg->fc_protocol = rtm->rtm_protocol;
2153 if (rtm->rtm_type == RTN_UNREACHABLE)
2154 cfg->fc_flags |= RTF_REJECT;
2156 if (rtm->rtm_type == RTN_LOCAL)
2157 cfg->fc_flags |= RTF_LOCAL;
2159 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2160 cfg->fc_nlinfo.nlh = nlh;
2161 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2163 if (tb[RTA_GATEWAY]) {
2164 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2165 cfg->fc_flags |= RTF_GATEWAY;
2168 if (tb[RTA_DST]) {
2169 int plen = (rtm->rtm_dst_len + 7) >> 3;
2171 if (nla_len(tb[RTA_DST]) < plen)
2172 goto errout;
2174 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2177 if (tb[RTA_SRC]) {
2178 int plen = (rtm->rtm_src_len + 7) >> 3;
2180 if (nla_len(tb[RTA_SRC]) < plen)
2181 goto errout;
2183 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2186 if (tb[RTA_OIF])
2187 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2189 if (tb[RTA_PRIORITY])
2190 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2192 if (tb[RTA_METRICS]) {
2193 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2194 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2197 if (tb[RTA_TABLE])
2198 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2200 err = 0;
2201 errout:
2202 return err;
2205 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2207 struct fib6_config cfg;
2208 int err;
2210 err = rtm_to_fib6_config(skb, nlh, &cfg);
2211 if (err < 0)
2212 return err;
2214 return ip6_route_del(&cfg);
2217 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2219 struct fib6_config cfg;
2220 int err;
2222 err = rtm_to_fib6_config(skb, nlh, &cfg);
2223 if (err < 0)
2224 return err;
2226 return ip6_route_add(&cfg);
2229 static inline size_t rt6_nlmsg_size(void)
2231 return NLMSG_ALIGN(sizeof(struct rtmsg))
2232 + nla_total_size(16) /* RTA_SRC */
2233 + nla_total_size(16) /* RTA_DST */
2234 + nla_total_size(16) /* RTA_GATEWAY */
2235 + nla_total_size(16) /* RTA_PREFSRC */
2236 + nla_total_size(4) /* RTA_TABLE */
2237 + nla_total_size(4) /* RTA_IIF */
2238 + nla_total_size(4) /* RTA_OIF */
2239 + nla_total_size(4) /* RTA_PRIORITY */
2240 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2241 + nla_total_size(sizeof(struct rta_cacheinfo));
2244 static int rt6_fill_node(struct net *net,
2245 struct sk_buff *skb, struct rt6_info *rt,
2246 struct in6_addr *dst, struct in6_addr *src,
2247 int iif, int type, u32 pid, u32 seq,
2248 int prefix, int nowait, unsigned int flags)
2250 struct rtmsg *rtm;
2251 struct nlmsghdr *nlh;
2252 long expires;
2253 u32 table;
2255 if (prefix) { /* user wants prefix routes only */
2256 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2257 /* success since this is not a prefix route */
2258 return 1;
2262 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2263 if (nlh == NULL)
2264 return -EMSGSIZE;
2266 rtm = nlmsg_data(nlh);
2267 rtm->rtm_family = AF_INET6;
2268 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2269 rtm->rtm_src_len = rt->rt6i_src.plen;
2270 rtm->rtm_tos = 0;
2271 if (rt->rt6i_table)
2272 table = rt->rt6i_table->tb6_id;
2273 else
2274 table = RT6_TABLE_UNSPEC;
2275 rtm->rtm_table = table;
2276 NLA_PUT_U32(skb, RTA_TABLE, table);
2277 if (rt->rt6i_flags&RTF_REJECT)
2278 rtm->rtm_type = RTN_UNREACHABLE;
2279 else if (rt->rt6i_flags&RTF_LOCAL)
2280 rtm->rtm_type = RTN_LOCAL;
2281 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2282 rtm->rtm_type = RTN_LOCAL;
2283 else
2284 rtm->rtm_type = RTN_UNICAST;
2285 rtm->rtm_flags = 0;
2286 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2287 rtm->rtm_protocol = rt->rt6i_protocol;
2288 if (rt->rt6i_flags&RTF_DYNAMIC)
2289 rtm->rtm_protocol = RTPROT_REDIRECT;
2290 else if (rt->rt6i_flags & RTF_ADDRCONF)
2291 rtm->rtm_protocol = RTPROT_KERNEL;
2292 else if (rt->rt6i_flags&RTF_DEFAULT)
2293 rtm->rtm_protocol = RTPROT_RA;
2295 if (rt->rt6i_flags&RTF_CACHE)
2296 rtm->rtm_flags |= RTM_F_CLONED;
2298 if (dst) {
2299 NLA_PUT(skb, RTA_DST, 16, dst);
2300 rtm->rtm_dst_len = 128;
2301 } else if (rtm->rtm_dst_len)
2302 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2303 #ifdef CONFIG_IPV6_SUBTREES
2304 if (src) {
2305 NLA_PUT(skb, RTA_SRC, 16, src);
2306 rtm->rtm_src_len = 128;
2307 } else if (rtm->rtm_src_len)
2308 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2309 #endif
2310 if (iif) {
2311 #ifdef CONFIG_IPV6_MROUTE
2312 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2313 int err = ip6mr_get_route(net, skb, rtm, nowait);
2314 if (err <= 0) {
2315 if (!nowait) {
2316 if (err == 0)
2317 return 0;
2318 goto nla_put_failure;
2319 } else {
2320 if (err == -EMSGSIZE)
2321 goto nla_put_failure;
2324 } else
2325 #endif
2326 NLA_PUT_U32(skb, RTA_IIF, iif);
2327 } else if (dst) {
2328 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2329 struct in6_addr saddr_buf;
2330 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2331 dst, 0, &saddr_buf) == 0)
2332 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2335 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2336 goto nla_put_failure;
2338 if (rt->dst.neighbour)
2339 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2341 if (rt->dst.dev)
2342 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2344 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2346 if (!(rt->rt6i_flags & RTF_EXPIRES))
2347 expires = 0;
2348 else if (rt->rt6i_expires - jiffies < INT_MAX)
2349 expires = rt->rt6i_expires - jiffies;
2350 else
2351 expires = INT_MAX;
2353 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2354 expires, rt->dst.error) < 0)
2355 goto nla_put_failure;
2357 return nlmsg_end(skb, nlh);
2359 nla_put_failure:
2360 nlmsg_cancel(skb, nlh);
2361 return -EMSGSIZE;
2364 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2366 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2367 int prefix;
2369 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2370 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2371 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2372 } else
2373 prefix = 0;
2375 return rt6_fill_node(arg->net,
2376 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2377 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2378 prefix, 0, NLM_F_MULTI);
2381 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2383 struct net *net = sock_net(in_skb->sk);
2384 struct nlattr *tb[RTA_MAX+1];
2385 struct rt6_info *rt;
2386 struct sk_buff *skb;
2387 struct rtmsg *rtm;
2388 struct flowi6 fl6;
2389 int err, iif = 0;
2391 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2392 if (err < 0)
2393 goto errout;
2395 err = -EINVAL;
2396 memset(&fl6, 0, sizeof(fl6));
2398 if (tb[RTA_SRC]) {
2399 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2400 goto errout;
2402 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2405 if (tb[RTA_DST]) {
2406 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2407 goto errout;
2409 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2412 if (tb[RTA_IIF])
2413 iif = nla_get_u32(tb[RTA_IIF]);
2415 if (tb[RTA_OIF])
2416 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2418 if (iif) {
2419 struct net_device *dev;
2420 dev = __dev_get_by_index(net, iif);
2421 if (!dev) {
2422 err = -ENODEV;
2423 goto errout;
2427 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2428 if (skb == NULL) {
2429 err = -ENOBUFS;
2430 goto errout;
2433 /* Reserve room for dummy headers, this skb can pass
2434 through good chunk of routing engine.
2436 skb_reset_mac_header(skb);
2437 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2439 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2440 skb_dst_set(skb, &rt->dst);
2442 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2443 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2444 nlh->nlmsg_seq, 0, 0, 0);
2445 if (err < 0) {
2446 kfree_skb(skb);
2447 goto errout;
2450 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2451 errout:
2452 return err;
2455 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2457 struct sk_buff *skb;
2458 struct net *net = info->nl_net;
2459 u32 seq;
2460 int err;
2462 err = -ENOBUFS;
2463 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2465 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2466 if (skb == NULL)
2467 goto errout;
2469 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2470 event, info->pid, seq, 0, 0, 0);
2471 if (err < 0) {
2472 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2473 WARN_ON(err == -EMSGSIZE);
2474 kfree_skb(skb);
2475 goto errout;
2477 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2478 info->nlh, gfp_any());
2479 return;
2480 errout:
2481 if (err < 0)
2482 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2485 static int ip6_route_dev_notify(struct notifier_block *this,
2486 unsigned long event, void *data)
2488 struct net_device *dev = (struct net_device *)data;
2489 struct net *net = dev_net(dev);
2491 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2492 net->ipv6.ip6_null_entry->dst.dev = dev;
2493 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2494 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2495 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2496 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2497 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2498 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2499 #endif
2502 return NOTIFY_OK;
2506 * /proc
2509 #ifdef CONFIG_PROC_FS
2511 struct rt6_proc_arg
2513 char *buffer;
2514 int offset;
2515 int length;
2516 int skip;
2517 int len;
2520 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2522 struct seq_file *m = p_arg;
2524 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2526 #ifdef CONFIG_IPV6_SUBTREES
2527 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2528 #else
2529 seq_puts(m, "00000000000000000000000000000000 00 ");
2530 #endif
2532 if (rt->rt6i_nexthop) {
2533 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2534 } else {
2535 seq_puts(m, "00000000000000000000000000000000");
2537 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2538 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2539 rt->dst.__use, rt->rt6i_flags,
2540 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2541 return 0;
2544 static int ipv6_route_show(struct seq_file *m, void *v)
2546 struct net *net = (struct net *)m->private;
2547 fib6_clean_all(net, rt6_info_route, 0, m);
2548 return 0;
2551 static int ipv6_route_open(struct inode *inode, struct file *file)
2553 return single_open_net(inode, file, ipv6_route_show);
2556 static const struct file_operations ipv6_route_proc_fops = {
2557 .owner = THIS_MODULE,
2558 .open = ipv6_route_open,
2559 .read = seq_read,
2560 .llseek = seq_lseek,
2561 .release = single_release_net,
2564 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2566 struct net *net = (struct net *)seq->private;
2567 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2568 net->ipv6.rt6_stats->fib_nodes,
2569 net->ipv6.rt6_stats->fib_route_nodes,
2570 net->ipv6.rt6_stats->fib_rt_alloc,
2571 net->ipv6.rt6_stats->fib_rt_entries,
2572 net->ipv6.rt6_stats->fib_rt_cache,
2573 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2574 net->ipv6.rt6_stats->fib_discarded_routes);
2576 return 0;
2579 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2581 return single_open_net(inode, file, rt6_stats_seq_show);
2584 static const struct file_operations rt6_stats_seq_fops = {
2585 .owner = THIS_MODULE,
2586 .open = rt6_stats_seq_open,
2587 .read = seq_read,
2588 .llseek = seq_lseek,
2589 .release = single_release_net,
2591 #endif /* CONFIG_PROC_FS */
2593 #ifdef CONFIG_SYSCTL
2595 static
2596 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2597 void __user *buffer, size_t *lenp, loff_t *ppos)
2599 struct net *net;
2600 int delay;
2601 if (!write)
2602 return -EINVAL;
2604 net = (struct net *)ctl->extra1;
2605 delay = net->ipv6.sysctl.flush_delay;
2606 proc_dointvec(ctl, write, buffer, lenp, ppos);
2607 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2608 return 0;
2611 ctl_table ipv6_route_table_template[] = {
2613 .procname = "flush",
2614 .data = &init_net.ipv6.sysctl.flush_delay,
2615 .maxlen = sizeof(int),
2616 .mode = 0200,
2617 .proc_handler = ipv6_sysctl_rtcache_flush
2620 .procname = "gc_thresh",
2621 .data = &ip6_dst_ops_template.gc_thresh,
2622 .maxlen = sizeof(int),
2623 .mode = 0644,
2624 .proc_handler = proc_dointvec,
2627 .procname = "max_size",
2628 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2629 .maxlen = sizeof(int),
2630 .mode = 0644,
2631 .proc_handler = proc_dointvec,
2634 .procname = "gc_min_interval",
2635 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2636 .maxlen = sizeof(int),
2637 .mode = 0644,
2638 .proc_handler = proc_dointvec_jiffies,
2641 .procname = "gc_timeout",
2642 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2643 .maxlen = sizeof(int),
2644 .mode = 0644,
2645 .proc_handler = proc_dointvec_jiffies,
2648 .procname = "gc_interval",
2649 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2650 .maxlen = sizeof(int),
2651 .mode = 0644,
2652 .proc_handler = proc_dointvec_jiffies,
2655 .procname = "gc_elasticity",
2656 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2657 .maxlen = sizeof(int),
2658 .mode = 0644,
2659 .proc_handler = proc_dointvec,
2662 .procname = "mtu_expires",
2663 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2664 .maxlen = sizeof(int),
2665 .mode = 0644,
2666 .proc_handler = proc_dointvec_jiffies,
2669 .procname = "min_adv_mss",
2670 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2671 .maxlen = sizeof(int),
2672 .mode = 0644,
2673 .proc_handler = proc_dointvec,
2676 .procname = "gc_min_interval_ms",
2677 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2678 .maxlen = sizeof(int),
2679 .mode = 0644,
2680 .proc_handler = proc_dointvec_ms_jiffies,
2685 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2687 struct ctl_table *table;
2689 table = kmemdup(ipv6_route_table_template,
2690 sizeof(ipv6_route_table_template),
2691 GFP_KERNEL);
2693 if (table) {
2694 table[0].data = &net->ipv6.sysctl.flush_delay;
2695 table[0].extra1 = net;
2696 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2697 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2698 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2699 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2700 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2701 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2702 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2703 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2704 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2707 return table;
2709 #endif
2711 static int __net_init ip6_route_net_init(struct net *net)
2713 int ret = -ENOMEM;
2715 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2716 sizeof(net->ipv6.ip6_dst_ops));
2718 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2719 goto out_ip6_dst_ops;
2721 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2722 sizeof(*net->ipv6.ip6_null_entry),
2723 GFP_KERNEL);
2724 if (!net->ipv6.ip6_null_entry)
2725 goto out_ip6_dst_entries;
2726 net->ipv6.ip6_null_entry->dst.path =
2727 (struct dst_entry *)net->ipv6.ip6_null_entry;
2728 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2729 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2730 ip6_template_metrics, true);
2732 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2733 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2734 sizeof(*net->ipv6.ip6_prohibit_entry),
2735 GFP_KERNEL);
2736 if (!net->ipv6.ip6_prohibit_entry)
2737 goto out_ip6_null_entry;
2738 net->ipv6.ip6_prohibit_entry->dst.path =
2739 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2740 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2741 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2742 ip6_template_metrics, true);
2744 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2745 sizeof(*net->ipv6.ip6_blk_hole_entry),
2746 GFP_KERNEL);
2747 if (!net->ipv6.ip6_blk_hole_entry)
2748 goto out_ip6_prohibit_entry;
2749 net->ipv6.ip6_blk_hole_entry->dst.path =
2750 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2751 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2752 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2753 ip6_template_metrics, true);
2754 #endif
2756 net->ipv6.sysctl.flush_delay = 0;
2757 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2758 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2759 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2760 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2761 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2762 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2763 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2765 #ifdef CONFIG_PROC_FS
2766 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2767 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2768 #endif
2769 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2771 ret = 0;
2772 out:
2773 return ret;
2775 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2776 out_ip6_prohibit_entry:
2777 kfree(net->ipv6.ip6_prohibit_entry);
2778 out_ip6_null_entry:
2779 kfree(net->ipv6.ip6_null_entry);
2780 #endif
2781 out_ip6_dst_entries:
2782 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2783 out_ip6_dst_ops:
2784 goto out;
2787 static void __net_exit ip6_route_net_exit(struct net *net)
2789 #ifdef CONFIG_PROC_FS
2790 proc_net_remove(net, "ipv6_route");
2791 proc_net_remove(net, "rt6_stats");
2792 #endif
2793 kfree(net->ipv6.ip6_null_entry);
2794 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2795 kfree(net->ipv6.ip6_prohibit_entry);
2796 kfree(net->ipv6.ip6_blk_hole_entry);
2797 #endif
2798 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2801 static struct pernet_operations ip6_route_net_ops = {
2802 .init = ip6_route_net_init,
2803 .exit = ip6_route_net_exit,
2806 static struct notifier_block ip6_route_dev_notifier = {
2807 .notifier_call = ip6_route_dev_notify,
2808 .priority = 0,
2811 int __init ip6_route_init(void)
2813 int ret;
2815 ret = -ENOMEM;
2816 ip6_dst_ops_template.kmem_cachep =
2817 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2818 SLAB_HWCACHE_ALIGN, NULL);
2819 if (!ip6_dst_ops_template.kmem_cachep)
2820 goto out;
2822 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2823 if (ret)
2824 goto out_kmem_cache;
2826 ret = register_pernet_subsys(&ip6_route_net_ops);
2827 if (ret)
2828 goto out_dst_entries;
2830 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2832 /* Registering of the loopback is done before this portion of code,
2833 * the loopback reference in rt6_info will not be taken, do it
2834 * manually for init_net */
2835 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2836 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2837 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2838 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2839 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2840 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2841 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2842 #endif
2843 ret = fib6_init();
2844 if (ret)
2845 goto out_register_subsys;
2847 ret = xfrm6_init();
2848 if (ret)
2849 goto out_fib6_init;
2851 ret = fib6_rules_init();
2852 if (ret)
2853 goto xfrm6_init;
2855 ret = -ENOBUFS;
2856 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2857 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2858 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2859 goto fib6_rules_init;
2861 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2862 if (ret)
2863 goto fib6_rules_init;
2865 out:
2866 return ret;
2868 fib6_rules_init:
2869 fib6_rules_cleanup();
2870 xfrm6_init:
2871 xfrm6_fini();
2872 out_fib6_init:
2873 fib6_gc_cleanup();
2874 out_register_subsys:
2875 unregister_pernet_subsys(&ip6_route_net_ops);
2876 out_dst_entries:
2877 dst_entries_destroy(&ip6_dst_blackhole_ops);
2878 out_kmem_cache:
2879 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2880 goto out;
2883 void ip6_route_cleanup(void)
2885 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2886 fib6_rules_cleanup();
2887 xfrm6_fini();
2888 fib6_gc_cleanup();
2889 unregister_pernet_subsys(&ip6_route_net_ops);
2890 dst_entries_destroy(&ip6_dst_blackhole_ops);
2891 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);