[PATCH] oom-kill: update comments to reflect current code
[pv_ops_mirror.git] / net / ipv6 / route.c
blobd6b4b4f48d18cb6d9736c520b854d0899c98bc23
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 /* Changes:
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 * Ville Nuorvala
26 * Fixed routing subtrees.
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
42 #ifdef CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57 #include <net/netevent.h>
58 #include <net/netlink.h>
60 #include <asm/uaccess.h>
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
66 /* Set to 3 to get tracing. */
67 #define RT6_DEBUG 2
69 #if RT6_DEBUG >= 3
70 #define RDBG(x) printk x
71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #else
73 #define RDBG(x)
74 #define RT6_TRACE(x...) do { ; } while (0)
75 #endif
77 #define CLONE_OFFLINK_ROUTE 0
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void ip6_dst_destroy(struct dst_entry *);
91 static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93 static int ip6_dst_gc(void);
95 static int ip6_pkt_discard(struct sk_buff *skb);
96 static int ip6_pkt_discard_out(struct sk_buff *skb);
97 static void ip6_link_failure(struct sk_buff *skb);
98 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106 #endif
108 static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
122 struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
144 struct rt6_info ip6_prohibit_entry = {
145 .u = {
146 .dst = {
147 .__refcnt = ATOMIC_INIT(1),
148 .__use = 1,
149 .dev = &loopback_dev,
150 .obsolete = -1,
151 .error = -EACCES,
152 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
153 .input = ip6_pkt_discard,
154 .output = ip6_pkt_discard_out,
155 .ops = &ip6_dst_ops,
156 .path = (struct dst_entry*)&ip6_prohibit_entry,
159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
160 .rt6i_metric = ~(u32) 0,
161 .rt6i_ref = ATOMIC_INIT(1),
164 struct rt6_info ip6_blk_hole_entry = {
165 .u = {
166 .dst = {
167 .__refcnt = ATOMIC_INIT(1),
168 .__use = 1,
169 .dev = &loopback_dev,
170 .obsolete = -1,
171 .error = -EINVAL,
172 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
173 .input = ip6_pkt_discard,
174 .output = ip6_pkt_discard_out,
175 .ops = &ip6_dst_ops,
176 .path = (struct dst_entry*)&ip6_blk_hole_entry,
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_metric = ~(u32) 0,
181 .rt6i_ref = ATOMIC_INIT(1),
184 #endif
186 /* allocate dst with ip6_dst_ops */
187 static __inline__ struct rt6_info *ip6_dst_alloc(void)
189 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
192 static void ip6_dst_destroy(struct dst_entry *dst)
194 struct rt6_info *rt = (struct rt6_info *)dst;
195 struct inet6_dev *idev = rt->rt6i_idev;
197 if (idev != NULL) {
198 rt->rt6i_idev = NULL;
199 in6_dev_put(idev);
203 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
204 int how)
206 struct rt6_info *rt = (struct rt6_info *)dst;
207 struct inet6_dev *idev = rt->rt6i_idev;
209 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
210 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
211 if (loopback_idev != NULL) {
212 rt->rt6i_idev = loopback_idev;
213 in6_dev_put(idev);
218 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220 return (rt->rt6i_flags & RTF_EXPIRES &&
221 time_after(jiffies, rt->rt6i_expires));
224 static inline int rt6_need_strict(struct in6_addr *daddr)
226 return (ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
231 * Route lookup. Any table->tb6_lock is implied.
234 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
235 int oif,
236 int strict)
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
241 if (oif) {
242 for (sprt = rt; sprt; sprt = sprt->u.next) {
243 struct net_device *dev = sprt->rt6i_dev;
244 if (dev->ifindex == oif)
245 return sprt;
246 if (dev->flags & IFF_LOOPBACK) {
247 if (sprt->rt6i_idev == NULL ||
248 sprt->rt6i_idev->dev->ifindex != oif) {
249 if (strict && oif)
250 continue;
251 if (local && (!oif ||
252 local->rt6i_idev->dev->ifindex == oif))
253 continue;
255 local = sprt;
259 if (local)
260 return local;
262 if (strict)
263 return &ip6_null_entry;
265 return rt;
268 #ifdef CONFIG_IPV6_ROUTER_PREF
269 static void rt6_probe(struct rt6_info *rt)
271 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273 * Okay, this does not seem to be appropriate
274 * for now, however, we need to check if it
275 * is really so; aka Router Reachability Probing.
277 * Router Reachability Probe MUST be rate-limited
278 * to no more than one per minute.
280 if (!neigh || (neigh->nud_state & NUD_VALID))
281 return;
282 read_lock_bh(&neigh->lock);
283 if (!(neigh->nud_state & NUD_VALID) &&
284 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
285 struct in6_addr mcaddr;
286 struct in6_addr *target;
288 neigh->updated = jiffies;
289 read_unlock_bh(&neigh->lock);
291 target = (struct in6_addr *)&neigh->primary_key;
292 addrconf_addr_solict_mult(target, &mcaddr);
293 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
294 } else
295 read_unlock_bh(&neigh->lock);
297 #else
298 static inline void rt6_probe(struct rt6_info *rt)
300 return;
302 #endif
305 * Default Router Selection (RFC 2461 6.3.6)
307 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
309 struct net_device *dev = rt->rt6i_dev;
310 if (!oif || dev->ifindex == oif)
311 return 2;
312 if ((dev->flags & IFF_LOOPBACK) &&
313 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
314 return 1;
315 return 0;
318 static int inline rt6_check_neigh(struct rt6_info *rt)
320 struct neighbour *neigh = rt->rt6i_nexthop;
321 int m = 0;
322 if (rt->rt6i_flags & RTF_NONEXTHOP ||
323 !(rt->rt6i_flags & RTF_GATEWAY))
324 m = 1;
325 else if (neigh) {
326 read_lock_bh(&neigh->lock);
327 if (neigh->nud_state & NUD_VALID)
328 m = 2;
329 read_unlock_bh(&neigh->lock);
331 return m;
334 static int rt6_score_route(struct rt6_info *rt, int oif,
335 int strict)
337 int m, n;
339 m = rt6_check_dev(rt, oif);
340 if (!m && (strict & RT6_LOOKUP_F_IFACE))
341 return -1;
342 #ifdef CONFIG_IPV6_ROUTER_PREF
343 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
344 #endif
345 n = rt6_check_neigh(rt);
346 if (n > 1)
347 m |= 16;
348 else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
349 return -1;
350 return m;
353 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
354 int strict)
356 struct rt6_info *match = NULL, *last = NULL;
357 struct rt6_info *rt, *rt0 = *head;
358 u32 metric;
359 int mpri = -1;
361 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
362 __FUNCTION__, head, head ? *head : NULL, oif);
364 for (rt = rt0, metric = rt0->rt6i_metric;
365 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
366 rt = rt->u.next) {
367 int m;
369 if (rt6_check_expired(rt))
370 continue;
372 last = rt;
374 m = rt6_score_route(rt, oif, strict);
375 if (m < 0)
376 continue;
378 if (m > mpri) {
379 rt6_probe(match);
380 match = rt;
381 mpri = m;
382 } else {
383 rt6_probe(rt);
387 if (!match &&
388 (strict & RT6_LOOKUP_F_REACHABLE) &&
389 last && last != rt0) {
390 /* no entries matched; do round-robin */
391 static DEFINE_SPINLOCK(lock);
392 spin_lock(&lock);
393 *head = rt0->u.next;
394 rt0->u.next = last->u.next;
395 last->u.next = rt0;
396 spin_unlock(&lock);
399 RT6_TRACE("%s() => %p, score=%d\n",
400 __FUNCTION__, match, mpri);
402 return (match ? match : &ip6_null_entry);
405 #ifdef CONFIG_IPV6_ROUTE_INFO
406 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
407 struct in6_addr *gwaddr)
409 struct route_info *rinfo = (struct route_info *) opt;
410 struct in6_addr prefix_buf, *prefix;
411 unsigned int pref;
412 u32 lifetime;
413 struct rt6_info *rt;
415 if (len < sizeof(struct route_info)) {
416 return -EINVAL;
419 /* Sanity check for prefix_len and length */
420 if (rinfo->length > 3) {
421 return -EINVAL;
422 } else if (rinfo->prefix_len > 128) {
423 return -EINVAL;
424 } else if (rinfo->prefix_len > 64) {
425 if (rinfo->length < 2) {
426 return -EINVAL;
428 } else if (rinfo->prefix_len > 0) {
429 if (rinfo->length < 1) {
430 return -EINVAL;
434 pref = rinfo->route_pref;
435 if (pref == ICMPV6_ROUTER_PREF_INVALID)
436 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438 lifetime = htonl(rinfo->lifetime);
439 if (lifetime == 0xffffffff) {
440 /* infinity */
441 } else if (lifetime > 0x7fffffff/HZ) {
442 /* Avoid arithmetic overflow */
443 lifetime = 0x7fffffff/HZ - 1;
446 if (rinfo->length == 3)
447 prefix = (struct in6_addr *)rinfo->prefix;
448 else {
449 /* this function is safe */
450 ipv6_addr_prefix(&prefix_buf,
451 (struct in6_addr *)rinfo->prefix,
452 rinfo->prefix_len);
453 prefix = &prefix_buf;
456 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458 if (rt && !lifetime) {
459 ip6_del_rt(rt);
460 rt = NULL;
463 if (!rt && lifetime)
464 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
465 pref);
466 else if (rt)
467 rt->rt6i_flags = RTF_ROUTEINFO |
468 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470 if (rt) {
471 if (lifetime == 0xffffffff) {
472 rt->rt6i_flags &= ~RTF_EXPIRES;
473 } else {
474 rt->rt6i_expires = jiffies + HZ * lifetime;
475 rt->rt6i_flags |= RTF_EXPIRES;
477 dst_release(&rt->u.dst);
479 return 0;
481 #endif
483 #define BACKTRACK(saddr) \
484 do { \
485 if (rt == &ip6_null_entry) { \
486 struct fib6_node *pn; \
487 while (fn) { \
488 if (fn->fn_flags & RTN_TL_ROOT) \
489 goto out; \
490 pn = fn->parent; \
491 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
492 fn = fib6_lookup(pn->subtree, NULL, saddr); \
493 else \
494 fn = pn; \
495 if (fn->fn_flags & RTN_RTINFO) \
496 goto restart; \
499 } while(0)
501 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
502 struct flowi *fl, int flags)
504 struct fib6_node *fn;
505 struct rt6_info *rt;
507 read_lock_bh(&table->tb6_lock);
508 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
509 restart:
510 rt = fn->leaf;
511 rt = rt6_device_match(rt, fl->oif, flags);
512 BACKTRACK(&fl->fl6_src);
513 out:
514 dst_hold(&rt->u.dst);
515 read_unlock_bh(&table->tb6_lock);
517 rt->u.dst.lastuse = jiffies;
518 rt->u.dst.__use++;
520 return rt;
524 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
525 int oif, int strict)
527 struct flowi fl = {
528 .oif = oif,
529 .nl_u = {
530 .ip6_u = {
531 .daddr = *daddr,
532 /* TODO: saddr */
536 struct dst_entry *dst;
537 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
539 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
540 if (dst->error == 0)
541 return (struct rt6_info *) dst;
543 dst_release(dst);
545 return NULL;
548 /* ip6_ins_rt is called with FREE table->tb6_lock.
549 It takes new route entry, the addition fails by any reason the
550 route is freed. In any case, if caller does not hold it, it may
551 be destroyed.
554 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
556 int err;
557 struct fib6_table *table;
559 table = rt->rt6i_table;
560 write_lock_bh(&table->tb6_lock);
561 err = fib6_add(&table->tb6_root, rt, info);
562 write_unlock_bh(&table->tb6_lock);
564 return err;
567 int ip6_ins_rt(struct rt6_info *rt)
569 return __ip6_ins_rt(rt, NULL);
572 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
573 struct in6_addr *saddr)
575 struct rt6_info *rt;
578 * Clone the route.
581 rt = ip6_rt_copy(ort);
583 if (rt) {
584 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
585 if (rt->rt6i_dst.plen != 128 &&
586 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
587 rt->rt6i_flags |= RTF_ANYCAST;
588 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
591 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
592 rt->rt6i_dst.plen = 128;
593 rt->rt6i_flags |= RTF_CACHE;
594 rt->u.dst.flags |= DST_HOST;
596 #ifdef CONFIG_IPV6_SUBTREES
597 if (rt->rt6i_src.plen && saddr) {
598 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
599 rt->rt6i_src.plen = 128;
601 #endif
603 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
607 return rt;
610 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
612 struct rt6_info *rt = ip6_rt_copy(ort);
613 if (rt) {
614 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
615 rt->rt6i_dst.plen = 128;
616 rt->rt6i_flags |= RTF_CACHE;
617 if (rt->rt6i_flags & RTF_REJECT)
618 rt->u.dst.error = ort->u.dst.error;
619 rt->u.dst.flags |= DST_HOST;
620 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
622 return rt;
625 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
626 struct flowi *fl, int flags)
628 struct fib6_node *fn;
629 struct rt6_info *rt, *nrt;
630 int strict = 0;
631 int attempts = 3;
632 int err;
633 int reachable = RT6_LOOKUP_F_REACHABLE;
635 strict |= flags & RT6_LOOKUP_F_IFACE;
637 relookup:
638 read_lock_bh(&table->tb6_lock);
640 restart_2:
641 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
643 restart:
644 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
645 BACKTRACK(&fl->fl6_src);
646 if (rt == &ip6_null_entry ||
647 rt->rt6i_flags & RTF_CACHE)
648 goto out;
650 dst_hold(&rt->u.dst);
651 read_unlock_bh(&table->tb6_lock);
653 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
654 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
655 else {
656 #if CLONE_OFFLINK_ROUTE
657 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
658 #else
659 goto out2;
660 #endif
663 dst_release(&rt->u.dst);
664 rt = nrt ? : &ip6_null_entry;
666 dst_hold(&rt->u.dst);
667 if (nrt) {
668 err = ip6_ins_rt(nrt);
669 if (!err)
670 goto out2;
673 if (--attempts <= 0)
674 goto out2;
677 * Race condition! In the gap, when table->tb6_lock was
678 * released someone could insert this route. Relookup.
680 dst_release(&rt->u.dst);
681 goto relookup;
683 out:
684 if (reachable) {
685 reachable = 0;
686 goto restart_2;
688 dst_hold(&rt->u.dst);
689 read_unlock_bh(&table->tb6_lock);
690 out2:
691 rt->u.dst.lastuse = jiffies;
692 rt->u.dst.__use++;
694 return rt;
697 void ip6_route_input(struct sk_buff *skb)
699 struct ipv6hdr *iph = skb->nh.ipv6h;
700 struct flowi fl = {
701 .iif = skb->dev->ifindex,
702 .nl_u = {
703 .ip6_u = {
704 .daddr = iph->daddr,
705 .saddr = iph->saddr,
706 #ifdef CONFIG_IPV6_ROUTE_FWMARK
707 .fwmark = skb->nfmark,
708 #endif
709 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
712 .proto = iph->nexthdr,
714 int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
716 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
719 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
720 struct flowi *fl, int flags)
722 struct fib6_node *fn;
723 struct rt6_info *rt, *nrt;
724 int strict = 0;
725 int attempts = 3;
726 int err;
727 int reachable = RT6_LOOKUP_F_REACHABLE;
729 strict |= flags & RT6_LOOKUP_F_IFACE;
731 relookup:
732 read_lock_bh(&table->tb6_lock);
734 restart_2:
735 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
737 restart:
738 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
739 BACKTRACK(&fl->fl6_src);
740 if (rt == &ip6_null_entry ||
741 rt->rt6i_flags & RTF_CACHE)
742 goto out;
744 dst_hold(&rt->u.dst);
745 read_unlock_bh(&table->tb6_lock);
747 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
748 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
749 else {
750 #if CLONE_OFFLINK_ROUTE
751 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
752 #else
753 goto out2;
754 #endif
757 dst_release(&rt->u.dst);
758 rt = nrt ? : &ip6_null_entry;
760 dst_hold(&rt->u.dst);
761 if (nrt) {
762 err = ip6_ins_rt(nrt);
763 if (!err)
764 goto out2;
767 if (--attempts <= 0)
768 goto out2;
771 * Race condition! In the gap, when table->tb6_lock was
772 * released someone could insert this route. Relookup.
774 dst_release(&rt->u.dst);
775 goto relookup;
777 out:
778 if (reachable) {
779 reachable = 0;
780 goto restart_2;
782 dst_hold(&rt->u.dst);
783 read_unlock_bh(&table->tb6_lock);
784 out2:
785 rt->u.dst.lastuse = jiffies;
786 rt->u.dst.__use++;
787 return rt;
790 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
792 int flags = 0;
794 if (rt6_need_strict(&fl->fl6_dst))
795 flags |= RT6_LOOKUP_F_IFACE;
797 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
802 * Destination cache support functions
805 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
807 struct rt6_info *rt;
809 rt = (struct rt6_info *) dst;
811 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
812 return dst;
814 return NULL;
817 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
819 struct rt6_info *rt = (struct rt6_info *) dst;
821 if (rt) {
822 if (rt->rt6i_flags & RTF_CACHE)
823 ip6_del_rt(rt);
824 else
825 dst_release(dst);
827 return NULL;
830 static void ip6_link_failure(struct sk_buff *skb)
832 struct rt6_info *rt;
834 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
836 rt = (struct rt6_info *) skb->dst;
837 if (rt) {
838 if (rt->rt6i_flags&RTF_CACHE) {
839 dst_set_expires(&rt->u.dst, 0);
840 rt->rt6i_flags |= RTF_EXPIRES;
841 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
842 rt->rt6i_node->fn_sernum = -1;
846 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
848 struct rt6_info *rt6 = (struct rt6_info*)dst;
850 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
851 rt6->rt6i_flags |= RTF_MODIFIED;
852 if (mtu < IPV6_MIN_MTU) {
853 mtu = IPV6_MIN_MTU;
854 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
856 dst->metrics[RTAX_MTU-1] = mtu;
857 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
861 static int ipv6_get_mtu(struct net_device *dev);
863 static inline unsigned int ipv6_advmss(unsigned int mtu)
865 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
867 if (mtu < ip6_rt_min_advmss)
868 mtu = ip6_rt_min_advmss;
871 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
872 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
873 * IPV6_MAXPLEN is also valid and means: "any MSS,
874 * rely only on pmtu discovery"
876 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
877 mtu = IPV6_MAXPLEN;
878 return mtu;
881 static struct dst_entry *ndisc_dst_gc_list;
882 static DEFINE_SPINLOCK(ndisc_lock);
884 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
885 struct neighbour *neigh,
886 struct in6_addr *addr,
887 int (*output)(struct sk_buff *))
889 struct rt6_info *rt;
890 struct inet6_dev *idev = in6_dev_get(dev);
892 if (unlikely(idev == NULL))
893 return NULL;
895 rt = ip6_dst_alloc();
896 if (unlikely(rt == NULL)) {
897 in6_dev_put(idev);
898 goto out;
901 dev_hold(dev);
902 if (neigh)
903 neigh_hold(neigh);
904 else
905 neigh = ndisc_get_neigh(dev, addr);
907 rt->rt6i_dev = dev;
908 rt->rt6i_idev = idev;
909 rt->rt6i_nexthop = neigh;
910 atomic_set(&rt->u.dst.__refcnt, 1);
911 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
912 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
913 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
914 rt->u.dst.output = output;
916 #if 0 /* there's no chance to use these for ndisc */
917 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
918 ? DST_HOST
919 : 0;
920 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
921 rt->rt6i_dst.plen = 128;
922 #endif
924 spin_lock_bh(&ndisc_lock);
925 rt->u.dst.next = ndisc_dst_gc_list;
926 ndisc_dst_gc_list = &rt->u.dst;
927 spin_unlock_bh(&ndisc_lock);
929 fib6_force_start_gc();
931 out:
932 return (struct dst_entry *)rt;
935 int ndisc_dst_gc(int *more)
937 struct dst_entry *dst, *next, **pprev;
938 int freed;
940 next = NULL;
941 freed = 0;
943 spin_lock_bh(&ndisc_lock);
944 pprev = &ndisc_dst_gc_list;
946 while ((dst = *pprev) != NULL) {
947 if (!atomic_read(&dst->__refcnt)) {
948 *pprev = dst->next;
949 dst_free(dst);
950 freed++;
951 } else {
952 pprev = &dst->next;
953 (*more)++;
957 spin_unlock_bh(&ndisc_lock);
959 return freed;
962 static int ip6_dst_gc(void)
964 static unsigned expire = 30*HZ;
965 static unsigned long last_gc;
966 unsigned long now = jiffies;
968 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
969 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
970 goto out;
972 expire++;
973 fib6_run_gc(expire);
974 last_gc = now;
975 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
976 expire = ip6_rt_gc_timeout>>1;
978 out:
979 expire -= expire>>ip6_rt_gc_elasticity;
980 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
983 /* Clean host part of a prefix. Not necessary in radix tree,
984 but results in cleaner routing tables.
986 Remove it only when all the things will work!
989 static int ipv6_get_mtu(struct net_device *dev)
991 int mtu = IPV6_MIN_MTU;
992 struct inet6_dev *idev;
994 idev = in6_dev_get(dev);
995 if (idev) {
996 mtu = idev->cnf.mtu6;
997 in6_dev_put(idev);
999 return mtu;
1002 int ipv6_get_hoplimit(struct net_device *dev)
1004 int hoplimit = ipv6_devconf.hop_limit;
1005 struct inet6_dev *idev;
1007 idev = in6_dev_get(dev);
1008 if (idev) {
1009 hoplimit = idev->cnf.hop_limit;
1010 in6_dev_put(idev);
1012 return hoplimit;
1019 int ip6_route_add(struct fib6_config *cfg)
1021 int err;
1022 struct rt6_info *rt = NULL;
1023 struct net_device *dev = NULL;
1024 struct inet6_dev *idev = NULL;
1025 struct fib6_table *table;
1026 int addr_type;
1028 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1029 return -EINVAL;
1030 #ifndef CONFIG_IPV6_SUBTREES
1031 if (cfg->fc_src_len)
1032 return -EINVAL;
1033 #endif
1034 if (cfg->fc_ifindex) {
1035 err = -ENODEV;
1036 dev = dev_get_by_index(cfg->fc_ifindex);
1037 if (!dev)
1038 goto out;
1039 idev = in6_dev_get(dev);
1040 if (!idev)
1041 goto out;
1044 if (cfg->fc_metric == 0)
1045 cfg->fc_metric = IP6_RT_PRIO_USER;
1047 table = fib6_new_table(cfg->fc_table);
1048 if (table == NULL) {
1049 err = -ENOBUFS;
1050 goto out;
1053 rt = ip6_dst_alloc();
1055 if (rt == NULL) {
1056 err = -ENOMEM;
1057 goto out;
1060 rt->u.dst.obsolete = -1;
1061 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1063 if (cfg->fc_protocol == RTPROT_UNSPEC)
1064 cfg->fc_protocol = RTPROT_BOOT;
1065 rt->rt6i_protocol = cfg->fc_protocol;
1067 addr_type = ipv6_addr_type(&cfg->fc_dst);
1069 if (addr_type & IPV6_ADDR_MULTICAST)
1070 rt->u.dst.input = ip6_mc_input;
1071 else
1072 rt->u.dst.input = ip6_forward;
1074 rt->u.dst.output = ip6_output;
1076 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1077 rt->rt6i_dst.plen = cfg->fc_dst_len;
1078 if (rt->rt6i_dst.plen == 128)
1079 rt->u.dst.flags = DST_HOST;
1081 #ifdef CONFIG_IPV6_SUBTREES
1082 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1083 rt->rt6i_src.plen = cfg->fc_src_len;
1084 #endif
1086 rt->rt6i_metric = cfg->fc_metric;
1088 /* We cannot add true routes via loopback here,
1089 they would result in kernel looping; promote them to reject routes
1091 if ((cfg->fc_flags & RTF_REJECT) ||
1092 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1093 /* hold loopback dev/idev if we haven't done so. */
1094 if (dev != &loopback_dev) {
1095 if (dev) {
1096 dev_put(dev);
1097 in6_dev_put(idev);
1099 dev = &loopback_dev;
1100 dev_hold(dev);
1101 idev = in6_dev_get(dev);
1102 if (!idev) {
1103 err = -ENODEV;
1104 goto out;
1107 rt->u.dst.output = ip6_pkt_discard_out;
1108 rt->u.dst.input = ip6_pkt_discard;
1109 rt->u.dst.error = -ENETUNREACH;
1110 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1111 goto install_route;
1114 if (cfg->fc_flags & RTF_GATEWAY) {
1115 struct in6_addr *gw_addr;
1116 int gwa_type;
1118 gw_addr = &cfg->fc_gateway;
1119 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1120 gwa_type = ipv6_addr_type(gw_addr);
1122 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1123 struct rt6_info *grt;
1125 /* IPv6 strictly inhibits using not link-local
1126 addresses as nexthop address.
1127 Otherwise, router will not able to send redirects.
1128 It is very good, but in some (rare!) circumstances
1129 (SIT, PtP, NBMA NOARP links) it is handy to allow
1130 some exceptions. --ANK
1132 err = -EINVAL;
1133 if (!(gwa_type&IPV6_ADDR_UNICAST))
1134 goto out;
1136 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1138 err = -EHOSTUNREACH;
1139 if (grt == NULL)
1140 goto out;
1141 if (dev) {
1142 if (dev != grt->rt6i_dev) {
1143 dst_release(&grt->u.dst);
1144 goto out;
1146 } else {
1147 dev = grt->rt6i_dev;
1148 idev = grt->rt6i_idev;
1149 dev_hold(dev);
1150 in6_dev_hold(grt->rt6i_idev);
1152 if (!(grt->rt6i_flags&RTF_GATEWAY))
1153 err = 0;
1154 dst_release(&grt->u.dst);
1156 if (err)
1157 goto out;
1159 err = -EINVAL;
1160 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1161 goto out;
1164 err = -ENODEV;
1165 if (dev == NULL)
1166 goto out;
1168 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1169 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1170 if (IS_ERR(rt->rt6i_nexthop)) {
1171 err = PTR_ERR(rt->rt6i_nexthop);
1172 rt->rt6i_nexthop = NULL;
1173 goto out;
1177 rt->rt6i_flags = cfg->fc_flags;
1179 install_route:
1180 if (cfg->fc_mx) {
1181 struct nlattr *nla;
1182 int remaining;
1184 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1185 int type = nla->nla_type;
1187 if (type) {
1188 if (type > RTAX_MAX) {
1189 err = -EINVAL;
1190 goto out;
1193 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1198 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1199 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1200 if (!rt->u.dst.metrics[RTAX_MTU-1])
1201 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1202 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1203 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1204 rt->u.dst.dev = dev;
1205 rt->rt6i_idev = idev;
1206 rt->rt6i_table = table;
1207 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1209 out:
1210 if (dev)
1211 dev_put(dev);
1212 if (idev)
1213 in6_dev_put(idev);
1214 if (rt)
1215 dst_free((struct dst_entry *) rt);
1216 return err;
1219 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1221 int err;
1222 struct fib6_table *table;
1224 if (rt == &ip6_null_entry)
1225 return -ENOENT;
1227 table = rt->rt6i_table;
1228 write_lock_bh(&table->tb6_lock);
1230 err = fib6_del(rt, info);
1231 dst_release(&rt->u.dst);
1233 write_unlock_bh(&table->tb6_lock);
1235 return err;
1238 int ip6_del_rt(struct rt6_info *rt)
1240 return __ip6_del_rt(rt, NULL);
1243 static int ip6_route_del(struct fib6_config *cfg)
1245 struct fib6_table *table;
1246 struct fib6_node *fn;
1247 struct rt6_info *rt;
1248 int err = -ESRCH;
1250 table = fib6_get_table(cfg->fc_table);
1251 if (table == NULL)
1252 return err;
1254 read_lock_bh(&table->tb6_lock);
1256 fn = fib6_locate(&table->tb6_root,
1257 &cfg->fc_dst, cfg->fc_dst_len,
1258 &cfg->fc_src, cfg->fc_src_len);
1260 if (fn) {
1261 for (rt = fn->leaf; rt; rt = rt->u.next) {
1262 if (cfg->fc_ifindex &&
1263 (rt->rt6i_dev == NULL ||
1264 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1265 continue;
1266 if (cfg->fc_flags & RTF_GATEWAY &&
1267 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1268 continue;
1269 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1270 continue;
1271 dst_hold(&rt->u.dst);
1272 read_unlock_bh(&table->tb6_lock);
1274 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1277 read_unlock_bh(&table->tb6_lock);
1279 return err;
1283 * Handle redirects
1285 struct ip6rd_flowi {
1286 struct flowi fl;
1287 struct in6_addr gateway;
1290 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1291 struct flowi *fl,
1292 int flags)
1294 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1295 struct rt6_info *rt;
1296 struct fib6_node *fn;
1299 * Get the "current" route for this destination and
1300 * check if the redirect has come from approriate router.
1302 * RFC 2461 specifies that redirects should only be
1303 * accepted if they come from the nexthop to the target.
1304 * Due to the way the routes are chosen, this notion
1305 * is a bit fuzzy and one might need to check all possible
1306 * routes.
1309 read_lock_bh(&table->tb6_lock);
1310 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1311 restart:
1312 for (rt = fn->leaf; rt; rt = rt->u.next) {
1314 * Current route is on-link; redirect is always invalid.
1316 * Seems, previous statement is not true. It could
1317 * be node, which looks for us as on-link (f.e. proxy ndisc)
1318 * But then router serving it might decide, that we should
1319 * know truth 8)8) --ANK (980726).
1321 if (rt6_check_expired(rt))
1322 continue;
1323 if (!(rt->rt6i_flags & RTF_GATEWAY))
1324 continue;
1325 if (fl->oif != rt->rt6i_dev->ifindex)
1326 continue;
1327 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1328 continue;
1329 break;
1332 if (!rt)
1333 rt = &ip6_null_entry;
1334 BACKTRACK(&fl->fl6_src);
1335 out:
1336 dst_hold(&rt->u.dst);
1338 read_unlock_bh(&table->tb6_lock);
1340 return rt;
1343 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1344 struct in6_addr *src,
1345 struct in6_addr *gateway,
1346 struct net_device *dev)
1348 struct ip6rd_flowi rdfl = {
1349 .fl = {
1350 .oif = dev->ifindex,
1351 .nl_u = {
1352 .ip6_u = {
1353 .daddr = *dest,
1354 .saddr = *src,
1358 .gateway = *gateway,
1360 int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
1362 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1365 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1366 struct in6_addr *saddr,
1367 struct neighbour *neigh, u8 *lladdr, int on_link)
1369 struct rt6_info *rt, *nrt = NULL;
1370 struct netevent_redirect netevent;
1372 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1374 if (rt == &ip6_null_entry) {
1375 if (net_ratelimit())
1376 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1377 "for redirect target\n");
1378 goto out;
1382 * We have finally decided to accept it.
1385 neigh_update(neigh, lladdr, NUD_STALE,
1386 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1387 NEIGH_UPDATE_F_OVERRIDE|
1388 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1389 NEIGH_UPDATE_F_ISROUTER))
1393 * Redirect received -> path was valid.
1394 * Look, redirects are sent only in response to data packets,
1395 * so that this nexthop apparently is reachable. --ANK
1397 dst_confirm(&rt->u.dst);
1399 /* Duplicate redirect: silently ignore. */
1400 if (neigh == rt->u.dst.neighbour)
1401 goto out;
1403 nrt = ip6_rt_copy(rt);
1404 if (nrt == NULL)
1405 goto out;
1407 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1408 if (on_link)
1409 nrt->rt6i_flags &= ~RTF_GATEWAY;
1411 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1412 nrt->rt6i_dst.plen = 128;
1413 nrt->u.dst.flags |= DST_HOST;
1415 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1416 nrt->rt6i_nexthop = neigh_clone(neigh);
1417 /* Reset pmtu, it may be better */
1418 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1419 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1421 if (ip6_ins_rt(nrt))
1422 goto out;
1424 netevent.old = &rt->u.dst;
1425 netevent.new = &nrt->u.dst;
1426 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1428 if (rt->rt6i_flags&RTF_CACHE) {
1429 ip6_del_rt(rt);
1430 return;
1433 out:
1434 dst_release(&rt->u.dst);
1435 return;
1439 * Handle ICMP "packet too big" messages
1440 * i.e. Path MTU discovery
1443 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1444 struct net_device *dev, u32 pmtu)
1446 struct rt6_info *rt, *nrt;
1447 int allfrag = 0;
1449 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1450 if (rt == NULL)
1451 return;
1453 if (pmtu >= dst_mtu(&rt->u.dst))
1454 goto out;
1456 if (pmtu < IPV6_MIN_MTU) {
1458 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1459 * MTU (1280) and a fragment header should always be included
1460 * after a node receiving Too Big message reporting PMTU is
1461 * less than the IPv6 Minimum Link MTU.
1463 pmtu = IPV6_MIN_MTU;
1464 allfrag = 1;
1467 /* New mtu received -> path was valid.
1468 They are sent only in response to data packets,
1469 so that this nexthop apparently is reachable. --ANK
1471 dst_confirm(&rt->u.dst);
1473 /* Host route. If it is static, it would be better
1474 not to override it, but add new one, so that
1475 when cache entry will expire old pmtu
1476 would return automatically.
1478 if (rt->rt6i_flags & RTF_CACHE) {
1479 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1480 if (allfrag)
1481 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1482 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1483 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1484 goto out;
1487 /* Network route.
1488 Two cases are possible:
1489 1. It is connected route. Action: COW
1490 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1492 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1493 nrt = rt6_alloc_cow(rt, daddr, saddr);
1494 else
1495 nrt = rt6_alloc_clone(rt, daddr);
1497 if (nrt) {
1498 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1499 if (allfrag)
1500 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1502 /* According to RFC 1981, detecting PMTU increase shouldn't be
1503 * happened within 5 mins, the recommended timer is 10 mins.
1504 * Here this route expiration time is set to ip6_rt_mtu_expires
1505 * which is 10 mins. After 10 mins the decreased pmtu is expired
1506 * and detecting PMTU increase will be automatically happened.
1508 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1509 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1511 ip6_ins_rt(nrt);
1513 out:
1514 dst_release(&rt->u.dst);
1518 * Misc support functions
1521 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1523 struct rt6_info *rt = ip6_dst_alloc();
1525 if (rt) {
1526 rt->u.dst.input = ort->u.dst.input;
1527 rt->u.dst.output = ort->u.dst.output;
1529 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1530 rt->u.dst.dev = ort->u.dst.dev;
1531 if (rt->u.dst.dev)
1532 dev_hold(rt->u.dst.dev);
1533 rt->rt6i_idev = ort->rt6i_idev;
1534 if (rt->rt6i_idev)
1535 in6_dev_hold(rt->rt6i_idev);
1536 rt->u.dst.lastuse = jiffies;
1537 rt->rt6i_expires = 0;
1539 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1540 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1541 rt->rt6i_metric = 0;
1543 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1544 #ifdef CONFIG_IPV6_SUBTREES
1545 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1546 #endif
1547 rt->rt6i_table = ort->rt6i_table;
1549 return rt;
1552 #ifdef CONFIG_IPV6_ROUTE_INFO
1553 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1554 struct in6_addr *gwaddr, int ifindex)
1556 struct fib6_node *fn;
1557 struct rt6_info *rt = NULL;
1558 struct fib6_table *table;
1560 table = fib6_get_table(RT6_TABLE_INFO);
1561 if (table == NULL)
1562 return NULL;
1564 write_lock_bh(&table->tb6_lock);
1565 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1566 if (!fn)
1567 goto out;
1569 for (rt = fn->leaf; rt; rt = rt->u.next) {
1570 if (rt->rt6i_dev->ifindex != ifindex)
1571 continue;
1572 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1573 continue;
1574 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1575 continue;
1576 dst_hold(&rt->u.dst);
1577 break;
1579 out:
1580 write_unlock_bh(&table->tb6_lock);
1581 return rt;
1584 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1585 struct in6_addr *gwaddr, int ifindex,
1586 unsigned pref)
1588 struct fib6_config cfg = {
1589 .fc_table = RT6_TABLE_INFO,
1590 .fc_metric = 1024,
1591 .fc_ifindex = ifindex,
1592 .fc_dst_len = prefixlen,
1593 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1594 RTF_UP | RTF_PREF(pref),
1597 ipv6_addr_copy(&cfg.fc_dst, prefix);
1598 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1600 /* We should treat it as a default route if prefix length is 0. */
1601 if (!prefixlen)
1602 cfg.fc_flags |= RTF_DEFAULT;
1604 ip6_route_add(&cfg);
1606 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1608 #endif
1610 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1612 struct rt6_info *rt;
1613 struct fib6_table *table;
1615 table = fib6_get_table(RT6_TABLE_DFLT);
1616 if (table == NULL)
1617 return NULL;
1619 write_lock_bh(&table->tb6_lock);
1620 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1621 if (dev == rt->rt6i_dev &&
1622 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1623 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1624 break;
1626 if (rt)
1627 dst_hold(&rt->u.dst);
1628 write_unlock_bh(&table->tb6_lock);
1629 return rt;
1632 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1633 struct net_device *dev,
1634 unsigned int pref)
1636 struct fib6_config cfg = {
1637 .fc_table = RT6_TABLE_DFLT,
1638 .fc_metric = 1024,
1639 .fc_ifindex = dev->ifindex,
1640 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1641 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1644 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1646 ip6_route_add(&cfg);
1648 return rt6_get_dflt_router(gwaddr, dev);
1651 void rt6_purge_dflt_routers(void)
1653 struct rt6_info *rt;
1654 struct fib6_table *table;
1656 /* NOTE: Keep consistent with rt6_get_dflt_router */
1657 table = fib6_get_table(RT6_TABLE_DFLT);
1658 if (table == NULL)
1659 return;
1661 restart:
1662 read_lock_bh(&table->tb6_lock);
1663 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1664 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1665 dst_hold(&rt->u.dst);
1666 read_unlock_bh(&table->tb6_lock);
1667 ip6_del_rt(rt);
1668 goto restart;
1671 read_unlock_bh(&table->tb6_lock);
1674 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1675 struct fib6_config *cfg)
1677 memset(cfg, 0, sizeof(*cfg));
1679 cfg->fc_table = RT6_TABLE_MAIN;
1680 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1681 cfg->fc_metric = rtmsg->rtmsg_metric;
1682 cfg->fc_expires = rtmsg->rtmsg_info;
1683 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1684 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1685 cfg->fc_flags = rtmsg->rtmsg_flags;
1687 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1688 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1689 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1692 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1694 struct fib6_config cfg;
1695 struct in6_rtmsg rtmsg;
1696 int err;
1698 switch(cmd) {
1699 case SIOCADDRT: /* Add a route */
1700 case SIOCDELRT: /* Delete a route */
1701 if (!capable(CAP_NET_ADMIN))
1702 return -EPERM;
1703 err = copy_from_user(&rtmsg, arg,
1704 sizeof(struct in6_rtmsg));
1705 if (err)
1706 return -EFAULT;
1708 rtmsg_to_fib6_config(&rtmsg, &cfg);
1710 rtnl_lock();
1711 switch (cmd) {
1712 case SIOCADDRT:
1713 err = ip6_route_add(&cfg);
1714 break;
1715 case SIOCDELRT:
1716 err = ip6_route_del(&cfg);
1717 break;
1718 default:
1719 err = -EINVAL;
1721 rtnl_unlock();
1723 return err;
1726 return -EINVAL;
1730 * Drop the packet on the floor
1733 static int ip6_pkt_discard(struct sk_buff *skb)
1735 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1736 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1737 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1739 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1740 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1741 kfree_skb(skb);
1742 return 0;
1745 static int ip6_pkt_discard_out(struct sk_buff *skb)
1747 skb->dev = skb->dst->dev;
1748 return ip6_pkt_discard(skb);
1752 * Allocate a dst for local (unicast / anycast) address.
1755 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1756 const struct in6_addr *addr,
1757 int anycast)
1759 struct rt6_info *rt = ip6_dst_alloc();
1761 if (rt == NULL)
1762 return ERR_PTR(-ENOMEM);
1764 dev_hold(&loopback_dev);
1765 in6_dev_hold(idev);
1767 rt->u.dst.flags = DST_HOST;
1768 rt->u.dst.input = ip6_input;
1769 rt->u.dst.output = ip6_output;
1770 rt->rt6i_dev = &loopback_dev;
1771 rt->rt6i_idev = idev;
1772 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1773 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1774 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1775 rt->u.dst.obsolete = -1;
1777 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1778 if (anycast)
1779 rt->rt6i_flags |= RTF_ANYCAST;
1780 else
1781 rt->rt6i_flags |= RTF_LOCAL;
1782 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1783 if (rt->rt6i_nexthop == NULL) {
1784 dst_free((struct dst_entry *) rt);
1785 return ERR_PTR(-ENOMEM);
1788 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1789 rt->rt6i_dst.plen = 128;
1790 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1792 atomic_set(&rt->u.dst.__refcnt, 1);
1794 return rt;
1797 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1799 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1800 rt != &ip6_null_entry) {
1801 RT6_TRACE("deleted by ifdown %p\n", rt);
1802 return -1;
1804 return 0;
1807 void rt6_ifdown(struct net_device *dev)
1809 fib6_clean_all(fib6_ifdown, 0, dev);
1812 struct rt6_mtu_change_arg
1814 struct net_device *dev;
1815 unsigned mtu;
1818 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1820 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1821 struct inet6_dev *idev;
1823 /* In IPv6 pmtu discovery is not optional,
1824 so that RTAX_MTU lock cannot disable it.
1825 We still use this lock to block changes
1826 caused by addrconf/ndisc.
1829 idev = __in6_dev_get(arg->dev);
1830 if (idev == NULL)
1831 return 0;
1833 /* For administrative MTU increase, there is no way to discover
1834 IPv6 PMTU increase, so PMTU increase should be updated here.
1835 Since RFC 1981 doesn't include administrative MTU increase
1836 update PMTU increase is a MUST. (i.e. jumbo frame)
1839 If new MTU is less than route PMTU, this new MTU will be the
1840 lowest MTU in the path, update the route PMTU to reflect PMTU
1841 decreases; if new MTU is greater than route PMTU, and the
1842 old MTU is the lowest MTU in the path, update the route PMTU
1843 to reflect the increase. In this case if the other nodes' MTU
1844 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1845 PMTU discouvery.
1847 if (rt->rt6i_dev == arg->dev &&
1848 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1849 (dst_mtu(&rt->u.dst) > arg->mtu ||
1850 (dst_mtu(&rt->u.dst) < arg->mtu &&
1851 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1852 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1853 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1854 return 0;
1857 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1859 struct rt6_mtu_change_arg arg = {
1860 .dev = dev,
1861 .mtu = mtu,
1864 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1867 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1868 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
1869 [RTA_OIF] = { .type = NLA_U32 },
1870 [RTA_IIF] = { .type = NLA_U32 },
1871 [RTA_PRIORITY] = { .type = NLA_U32 },
1872 [RTA_METRICS] = { .type = NLA_NESTED },
1875 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1876 struct fib6_config *cfg)
1878 struct rtmsg *rtm;
1879 struct nlattr *tb[RTA_MAX+1];
1880 int err;
1882 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1883 if (err < 0)
1884 goto errout;
1886 err = -EINVAL;
1887 rtm = nlmsg_data(nlh);
1888 memset(cfg, 0, sizeof(*cfg));
1890 cfg->fc_table = rtm->rtm_table;
1891 cfg->fc_dst_len = rtm->rtm_dst_len;
1892 cfg->fc_src_len = rtm->rtm_src_len;
1893 cfg->fc_flags = RTF_UP;
1894 cfg->fc_protocol = rtm->rtm_protocol;
1896 if (rtm->rtm_type == RTN_UNREACHABLE)
1897 cfg->fc_flags |= RTF_REJECT;
1899 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1900 cfg->fc_nlinfo.nlh = nlh;
1902 if (tb[RTA_GATEWAY]) {
1903 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1904 cfg->fc_flags |= RTF_GATEWAY;
1907 if (tb[RTA_DST]) {
1908 int plen = (rtm->rtm_dst_len + 7) >> 3;
1910 if (nla_len(tb[RTA_DST]) < plen)
1911 goto errout;
1913 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1916 if (tb[RTA_SRC]) {
1917 int plen = (rtm->rtm_src_len + 7) >> 3;
1919 if (nla_len(tb[RTA_SRC]) < plen)
1920 goto errout;
1922 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1925 if (tb[RTA_OIF])
1926 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1928 if (tb[RTA_PRIORITY])
1929 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1931 if (tb[RTA_METRICS]) {
1932 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1933 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1936 if (tb[RTA_TABLE])
1937 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1939 err = 0;
1940 errout:
1941 return err;
1944 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1946 struct fib6_config cfg;
1947 int err;
1949 err = rtm_to_fib6_config(skb, nlh, &cfg);
1950 if (err < 0)
1951 return err;
1953 return ip6_route_del(&cfg);
1956 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1958 struct fib6_config cfg;
1959 int err;
1961 err = rtm_to_fib6_config(skb, nlh, &cfg);
1962 if (err < 0)
1963 return err;
1965 return ip6_route_add(&cfg);
1968 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1969 struct in6_addr *dst, struct in6_addr *src,
1970 int iif, int type, u32 pid, u32 seq,
1971 int prefix, unsigned int flags)
1973 struct rtmsg *rtm;
1974 struct nlmsghdr *nlh;
1975 struct rta_cacheinfo ci;
1976 u32 table;
1978 if (prefix) { /* user wants prefix routes only */
1979 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1980 /* success since this is not a prefix route */
1981 return 1;
1985 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1986 if (nlh == NULL)
1987 return -ENOBUFS;
1989 rtm = nlmsg_data(nlh);
1990 rtm->rtm_family = AF_INET6;
1991 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1992 rtm->rtm_src_len = rt->rt6i_src.plen;
1993 rtm->rtm_tos = 0;
1994 if (rt->rt6i_table)
1995 table = rt->rt6i_table->tb6_id;
1996 else
1997 table = RT6_TABLE_UNSPEC;
1998 rtm->rtm_table = table;
1999 NLA_PUT_U32(skb, RTA_TABLE, table);
2000 if (rt->rt6i_flags&RTF_REJECT)
2001 rtm->rtm_type = RTN_UNREACHABLE;
2002 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2003 rtm->rtm_type = RTN_LOCAL;
2004 else
2005 rtm->rtm_type = RTN_UNICAST;
2006 rtm->rtm_flags = 0;
2007 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2008 rtm->rtm_protocol = rt->rt6i_protocol;
2009 if (rt->rt6i_flags&RTF_DYNAMIC)
2010 rtm->rtm_protocol = RTPROT_REDIRECT;
2011 else if (rt->rt6i_flags & RTF_ADDRCONF)
2012 rtm->rtm_protocol = RTPROT_KERNEL;
2013 else if (rt->rt6i_flags&RTF_DEFAULT)
2014 rtm->rtm_protocol = RTPROT_RA;
2016 if (rt->rt6i_flags&RTF_CACHE)
2017 rtm->rtm_flags |= RTM_F_CLONED;
2019 if (dst) {
2020 NLA_PUT(skb, RTA_DST, 16, dst);
2021 rtm->rtm_dst_len = 128;
2022 } else if (rtm->rtm_dst_len)
2023 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2024 #ifdef CONFIG_IPV6_SUBTREES
2025 if (src) {
2026 NLA_PUT(skb, RTA_SRC, 16, src);
2027 rtm->rtm_src_len = 128;
2028 } else if (rtm->rtm_src_len)
2029 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2030 #endif
2031 if (iif)
2032 NLA_PUT_U32(skb, RTA_IIF, iif);
2033 else if (dst) {
2034 struct in6_addr saddr_buf;
2035 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2036 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2039 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2040 goto nla_put_failure;
2042 if (rt->u.dst.neighbour)
2043 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2045 if (rt->u.dst.dev)
2046 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2048 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2049 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2050 if (rt->rt6i_expires)
2051 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2052 else
2053 ci.rta_expires = 0;
2054 ci.rta_used = rt->u.dst.__use;
2055 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2056 ci.rta_error = rt->u.dst.error;
2057 ci.rta_id = 0;
2058 ci.rta_ts = 0;
2059 ci.rta_tsage = 0;
2060 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2062 return nlmsg_end(skb, nlh);
2064 nla_put_failure:
2065 return nlmsg_cancel(skb, nlh);
2068 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2070 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2071 int prefix;
2073 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2074 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2075 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2076 } else
2077 prefix = 0;
2079 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2080 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2081 prefix, NLM_F_MULTI);
2084 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2086 struct nlattr *tb[RTA_MAX+1];
2087 struct rt6_info *rt;
2088 struct sk_buff *skb;
2089 struct rtmsg *rtm;
2090 struct flowi fl;
2091 int err, iif = 0;
2093 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2094 if (err < 0)
2095 goto errout;
2097 err = -EINVAL;
2098 memset(&fl, 0, sizeof(fl));
2100 if (tb[RTA_SRC]) {
2101 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2102 goto errout;
2104 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2107 if (tb[RTA_DST]) {
2108 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2109 goto errout;
2111 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2114 if (tb[RTA_IIF])
2115 iif = nla_get_u32(tb[RTA_IIF]);
2117 if (tb[RTA_OIF])
2118 fl.oif = nla_get_u32(tb[RTA_OIF]);
2120 if (iif) {
2121 struct net_device *dev;
2122 dev = __dev_get_by_index(iif);
2123 if (!dev) {
2124 err = -ENODEV;
2125 goto errout;
2129 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2130 if (skb == NULL) {
2131 err = -ENOBUFS;
2132 goto errout;
2135 /* Reserve room for dummy headers, this skb can pass
2136 through good chunk of routing engine.
2138 skb->mac.raw = skb->data;
2139 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2141 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2142 skb->dst = &rt->u.dst;
2144 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2145 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2146 nlh->nlmsg_seq, 0, 0);
2147 if (err < 0) {
2148 kfree_skb(skb);
2149 goto errout;
2152 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2153 errout:
2154 return err;
2157 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2159 struct sk_buff *skb;
2160 u32 pid = 0, seq = 0;
2161 struct nlmsghdr *nlh = NULL;
2162 int payload = sizeof(struct rtmsg) + 256;
2163 int err = -ENOBUFS;
2165 if (info) {
2166 pid = info->pid;
2167 nlh = info->nlh;
2168 if (nlh)
2169 seq = nlh->nlmsg_seq;
2172 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2173 if (skb == NULL)
2174 goto errout;
2176 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2177 if (err < 0) {
2178 kfree_skb(skb);
2179 goto errout;
2182 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2183 errout:
2184 if (err < 0)
2185 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2189 * /proc
2192 #ifdef CONFIG_PROC_FS
2194 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2196 struct rt6_proc_arg
2198 char *buffer;
2199 int offset;
2200 int length;
2201 int skip;
2202 int len;
2205 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2207 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2208 int i;
2210 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2211 arg->skip++;
2212 return 0;
2215 if (arg->len >= arg->length)
2216 return 0;
2218 for (i=0; i<16; i++) {
2219 sprintf(arg->buffer + arg->len, "%02x",
2220 rt->rt6i_dst.addr.s6_addr[i]);
2221 arg->len += 2;
2223 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2224 rt->rt6i_dst.plen);
2226 #ifdef CONFIG_IPV6_SUBTREES
2227 for (i=0; i<16; i++) {
2228 sprintf(arg->buffer + arg->len, "%02x",
2229 rt->rt6i_src.addr.s6_addr[i]);
2230 arg->len += 2;
2232 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2233 rt->rt6i_src.plen);
2234 #else
2235 sprintf(arg->buffer + arg->len,
2236 "00000000000000000000000000000000 00 ");
2237 arg->len += 36;
2238 #endif
2240 if (rt->rt6i_nexthop) {
2241 for (i=0; i<16; i++) {
2242 sprintf(arg->buffer + arg->len, "%02x",
2243 rt->rt6i_nexthop->primary_key[i]);
2244 arg->len += 2;
2246 } else {
2247 sprintf(arg->buffer + arg->len,
2248 "00000000000000000000000000000000");
2249 arg->len += 32;
2251 arg->len += sprintf(arg->buffer + arg->len,
2252 " %08x %08x %08x %08x %8s\n",
2253 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2254 rt->u.dst.__use, rt->rt6i_flags,
2255 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2256 return 0;
2259 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2261 struct rt6_proc_arg arg = {
2262 .buffer = buffer,
2263 .offset = offset,
2264 .length = length,
2267 fib6_clean_all(rt6_info_route, 0, &arg);
2269 *start = buffer;
2270 if (offset)
2271 *start += offset % RT6_INFO_LEN;
2273 arg.len -= offset % RT6_INFO_LEN;
2275 if (arg.len > length)
2276 arg.len = length;
2277 if (arg.len < 0)
2278 arg.len = 0;
2280 return arg.len;
2283 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2285 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2286 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2287 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2288 rt6_stats.fib_rt_cache,
2289 atomic_read(&ip6_dst_ops.entries),
2290 rt6_stats.fib_discarded_routes);
2292 return 0;
2295 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2297 return single_open(file, rt6_stats_seq_show, NULL);
2300 static struct file_operations rt6_stats_seq_fops = {
2301 .owner = THIS_MODULE,
2302 .open = rt6_stats_seq_open,
2303 .read = seq_read,
2304 .llseek = seq_lseek,
2305 .release = single_release,
2307 #endif /* CONFIG_PROC_FS */
2309 #ifdef CONFIG_SYSCTL
2311 static int flush_delay;
2313 static
2314 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2315 void __user *buffer, size_t *lenp, loff_t *ppos)
2317 if (write) {
2318 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2319 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2320 return 0;
2321 } else
2322 return -EINVAL;
2325 ctl_table ipv6_route_table[] = {
2327 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2328 .procname = "flush",
2329 .data = &flush_delay,
2330 .maxlen = sizeof(int),
2331 .mode = 0200,
2332 .proc_handler = &ipv6_sysctl_rtcache_flush
2335 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2336 .procname = "gc_thresh",
2337 .data = &ip6_dst_ops.gc_thresh,
2338 .maxlen = sizeof(int),
2339 .mode = 0644,
2340 .proc_handler = &proc_dointvec,
2343 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2344 .procname = "max_size",
2345 .data = &ip6_rt_max_size,
2346 .maxlen = sizeof(int),
2347 .mode = 0644,
2348 .proc_handler = &proc_dointvec,
2351 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2352 .procname = "gc_min_interval",
2353 .data = &ip6_rt_gc_min_interval,
2354 .maxlen = sizeof(int),
2355 .mode = 0644,
2356 .proc_handler = &proc_dointvec_jiffies,
2357 .strategy = &sysctl_jiffies,
2360 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2361 .procname = "gc_timeout",
2362 .data = &ip6_rt_gc_timeout,
2363 .maxlen = sizeof(int),
2364 .mode = 0644,
2365 .proc_handler = &proc_dointvec_jiffies,
2366 .strategy = &sysctl_jiffies,
2369 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2370 .procname = "gc_interval",
2371 .data = &ip6_rt_gc_interval,
2372 .maxlen = sizeof(int),
2373 .mode = 0644,
2374 .proc_handler = &proc_dointvec_jiffies,
2375 .strategy = &sysctl_jiffies,
2378 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2379 .procname = "gc_elasticity",
2380 .data = &ip6_rt_gc_elasticity,
2381 .maxlen = sizeof(int),
2382 .mode = 0644,
2383 .proc_handler = &proc_dointvec_jiffies,
2384 .strategy = &sysctl_jiffies,
2387 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2388 .procname = "mtu_expires",
2389 .data = &ip6_rt_mtu_expires,
2390 .maxlen = sizeof(int),
2391 .mode = 0644,
2392 .proc_handler = &proc_dointvec_jiffies,
2393 .strategy = &sysctl_jiffies,
2396 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2397 .procname = "min_adv_mss",
2398 .data = &ip6_rt_min_advmss,
2399 .maxlen = sizeof(int),
2400 .mode = 0644,
2401 .proc_handler = &proc_dointvec_jiffies,
2402 .strategy = &sysctl_jiffies,
2405 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2406 .procname = "gc_min_interval_ms",
2407 .data = &ip6_rt_gc_min_interval,
2408 .maxlen = sizeof(int),
2409 .mode = 0644,
2410 .proc_handler = &proc_dointvec_ms_jiffies,
2411 .strategy = &sysctl_ms_jiffies,
2413 { .ctl_name = 0 }
2416 #endif
2418 void __init ip6_route_init(void)
2420 struct proc_dir_entry *p;
2422 ip6_dst_ops.kmem_cachep =
2423 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2424 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2425 fib6_init();
2426 #ifdef CONFIG_PROC_FS
2427 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2428 if (p)
2429 p->owner = THIS_MODULE;
2431 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2432 #endif
2433 #ifdef CONFIG_XFRM
2434 xfrm6_init();
2435 #endif
2436 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2437 fib6_rules_init();
2438 #endif
2441 void ip6_route_cleanup(void)
2443 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2444 fib6_rules_cleanup();
2445 #endif
2446 #ifdef CONFIG_PROC_FS
2447 proc_net_remove("ipv6_route");
2448 proc_net_remove("rt6_stats");
2449 #endif
2450 #ifdef CONFIG_XFRM
2451 xfrm6_fini();
2452 #endif
2453 rt6_ifdown(NULL);
2454 fib6_gc_cleanup();
2455 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);