[PATCH] v850: call init_page_count() instead of set_page_count()
[linux-2.6/verdex.git] / net / ipv6 / route.c
blob87c39c978cd0e12dc44ecddd521d5a220e40ff9f
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 /* Changes:
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
41 #ifdef CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
57 #include <asm/uaccess.h>
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
74 #define CLONE_OFFLINK_ROUTE 0
76 #define RT6_SELECT_F_IFACE 0x1
77 #define RT6_SELECT_F_REACHABLE 0x2
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void ip6_dst_destroy(struct dst_entry *);
91 static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93 static int ip6_dst_gc(void);
95 static int ip6_pkt_discard(struct sk_buff *skb);
96 static int ip6_pkt_discard_out(struct sk_buff *skb);
97 static void ip6_link_failure(struct sk_buff *skb);
98 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106 #endif
108 static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
122 struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
142 struct fib6_node ip6_routing_table = {
143 .leaf = &ip6_null_entry,
144 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
147 /* Protects all the ip6 fib */
149 DEFINE_RWLOCK(rt6_lock);
152 /* allocate dst with ip6_dst_ops */
153 static __inline__ struct rt6_info *ip6_dst_alloc(void)
155 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
158 static void ip6_dst_destroy(struct dst_entry *dst)
160 struct rt6_info *rt = (struct rt6_info *)dst;
161 struct inet6_dev *idev = rt->rt6i_idev;
163 if (idev != NULL) {
164 rt->rt6i_idev = NULL;
165 in6_dev_put(idev);
169 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
170 int how)
172 struct rt6_info *rt = (struct rt6_info *)dst;
173 struct inet6_dev *idev = rt->rt6i_idev;
175 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
176 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
177 if (loopback_idev != NULL) {
178 rt->rt6i_idev = loopback_idev;
179 in6_dev_put(idev);
184 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
186 return (rt->rt6i_flags & RTF_EXPIRES &&
187 time_after(jiffies, rt->rt6i_expires));
191 * Route lookup. Any rt6_lock is implied.
194 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
195 int oif,
196 int strict)
198 struct rt6_info *local = NULL;
199 struct rt6_info *sprt;
201 if (oif) {
202 for (sprt = rt; sprt; sprt = sprt->u.next) {
203 struct net_device *dev = sprt->rt6i_dev;
204 if (dev->ifindex == oif)
205 return sprt;
206 if (dev->flags & IFF_LOOPBACK) {
207 if (sprt->rt6i_idev == NULL ||
208 sprt->rt6i_idev->dev->ifindex != oif) {
209 if (strict && oif)
210 continue;
211 if (local && (!oif ||
212 local->rt6i_idev->dev->ifindex == oif))
213 continue;
215 local = sprt;
219 if (local)
220 return local;
222 if (strict)
223 return &ip6_null_entry;
225 return rt;
228 #ifdef CONFIG_IPV6_ROUTER_PREF
229 static void rt6_probe(struct rt6_info *rt)
231 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
233 * Okay, this does not seem to be appropriate
234 * for now, however, we need to check if it
235 * is really so; aka Router Reachability Probing.
237 * Router Reachability Probe MUST be rate-limited
238 * to no more than one per minute.
240 if (!neigh || (neigh->nud_state & NUD_VALID))
241 return;
242 read_lock_bh(&neigh->lock);
243 if (!(neigh->nud_state & NUD_VALID) &&
244 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
245 struct in6_addr mcaddr;
246 struct in6_addr *target;
248 neigh->updated = jiffies;
249 read_unlock_bh(&neigh->lock);
251 target = (struct in6_addr *)&neigh->primary_key;
252 addrconf_addr_solict_mult(target, &mcaddr);
253 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
254 } else
255 read_unlock_bh(&neigh->lock);
257 #else
258 static inline void rt6_probe(struct rt6_info *rt)
260 return;
262 #endif
265 * Default Router Selection (RFC 2461 6.3.6)
267 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
269 struct net_device *dev = rt->rt6i_dev;
270 if (!oif || dev->ifindex == oif)
271 return 2;
272 if ((dev->flags & IFF_LOOPBACK) &&
273 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
274 return 1;
275 return 0;
278 static int inline rt6_check_neigh(struct rt6_info *rt)
280 struct neighbour *neigh = rt->rt6i_nexthop;
281 int m = 0;
282 if (rt->rt6i_flags & RTF_NONEXTHOP ||
283 !(rt->rt6i_flags & RTF_GATEWAY))
284 m = 1;
285 else if (neigh) {
286 read_lock_bh(&neigh->lock);
287 if (neigh->nud_state & NUD_VALID)
288 m = 2;
289 read_unlock_bh(&neigh->lock);
291 return m;
294 static int rt6_score_route(struct rt6_info *rt, int oif,
295 int strict)
297 int m, n;
299 m = rt6_check_dev(rt, oif);
300 if (!m && (strict & RT6_SELECT_F_IFACE))
301 return -1;
302 #ifdef CONFIG_IPV6_ROUTER_PREF
303 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
304 #endif
305 n = rt6_check_neigh(rt);
306 if (n > 1)
307 m |= 16;
308 else if (!n && strict & RT6_SELECT_F_REACHABLE)
309 return -1;
310 return m;
313 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
314 int strict)
316 struct rt6_info *match = NULL, *last = NULL;
317 struct rt6_info *rt, *rt0 = *head;
318 u32 metric;
319 int mpri = -1;
321 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
322 __FUNCTION__, head, head ? *head : NULL, oif);
324 for (rt = rt0, metric = rt0->rt6i_metric;
325 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
326 rt = rt->u.next) {
327 int m;
329 if (rt6_check_expired(rt))
330 continue;
332 last = rt;
334 m = rt6_score_route(rt, oif, strict);
335 if (m < 0)
336 continue;
338 if (m > mpri) {
339 rt6_probe(match);
340 match = rt;
341 mpri = m;
342 } else {
343 rt6_probe(rt);
347 if (!match &&
348 (strict & RT6_SELECT_F_REACHABLE) &&
349 last && last != rt0) {
350 /* no entries matched; do round-robin */
351 static DEFINE_SPINLOCK(lock);
352 spin_lock(&lock);
353 *head = rt0->u.next;
354 rt0->u.next = last->u.next;
355 last->u.next = rt0;
356 spin_unlock(&lock);
359 RT6_TRACE("%s() => %p, score=%d\n",
360 __FUNCTION__, match, mpri);
362 return (match ? match : &ip6_null_entry);
365 #ifdef CONFIG_IPV6_ROUTE_INFO
366 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
367 struct in6_addr *gwaddr)
369 struct route_info *rinfo = (struct route_info *) opt;
370 struct in6_addr prefix_buf, *prefix;
371 unsigned int pref;
372 u32 lifetime;
373 struct rt6_info *rt;
375 if (len < sizeof(struct route_info)) {
376 return -EINVAL;
379 /* Sanity check for prefix_len and length */
380 if (rinfo->length > 3) {
381 return -EINVAL;
382 } else if (rinfo->prefix_len > 128) {
383 return -EINVAL;
384 } else if (rinfo->prefix_len > 64) {
385 if (rinfo->length < 2) {
386 return -EINVAL;
388 } else if (rinfo->prefix_len > 0) {
389 if (rinfo->length < 1) {
390 return -EINVAL;
394 pref = rinfo->route_pref;
395 if (pref == ICMPV6_ROUTER_PREF_INVALID)
396 pref = ICMPV6_ROUTER_PREF_MEDIUM;
398 lifetime = htonl(rinfo->lifetime);
399 if (lifetime == 0xffffffff) {
400 /* infinity */
401 } else if (lifetime > 0x7fffffff/HZ) {
402 /* Avoid arithmetic overflow */
403 lifetime = 0x7fffffff/HZ - 1;
406 if (rinfo->length == 3)
407 prefix = (struct in6_addr *)rinfo->prefix;
408 else {
409 /* this function is safe */
410 ipv6_addr_prefix(&prefix_buf,
411 (struct in6_addr *)rinfo->prefix,
412 rinfo->prefix_len);
413 prefix = &prefix_buf;
416 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
418 if (rt && !lifetime) {
419 ip6_del_rt(rt, NULL, NULL, NULL);
420 rt = NULL;
423 if (!rt && lifetime)
424 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
425 pref);
426 else if (rt)
427 rt->rt6i_flags = RTF_ROUTEINFO |
428 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
430 if (rt) {
431 if (lifetime == 0xffffffff) {
432 rt->rt6i_flags &= ~RTF_EXPIRES;
433 } else {
434 rt->rt6i_expires = jiffies + HZ * lifetime;
435 rt->rt6i_flags |= RTF_EXPIRES;
437 dst_release(&rt->u.dst);
439 return 0;
441 #endif
443 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
444 int oif, int strict)
446 struct fib6_node *fn;
447 struct rt6_info *rt;
449 read_lock_bh(&rt6_lock);
450 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
451 rt = rt6_device_match(fn->leaf, oif, strict);
452 dst_hold(&rt->u.dst);
453 rt->u.dst.__use++;
454 read_unlock_bh(&rt6_lock);
456 rt->u.dst.lastuse = jiffies;
457 if (rt->u.dst.error == 0)
458 return rt;
459 dst_release(&rt->u.dst);
460 return NULL;
463 /* ip6_ins_rt is called with FREE rt6_lock.
464 It takes new route entry, the addition fails by any reason the
465 route is freed. In any case, if caller does not hold it, it may
466 be destroyed.
469 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
470 void *_rtattr, struct netlink_skb_parms *req)
472 int err;
474 write_lock_bh(&rt6_lock);
475 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
476 write_unlock_bh(&rt6_lock);
478 return err;
481 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
482 struct in6_addr *saddr)
484 struct rt6_info *rt;
487 * Clone the route.
490 rt = ip6_rt_copy(ort);
492 if (rt) {
493 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
494 if (rt->rt6i_dst.plen != 128 &&
495 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
496 rt->rt6i_flags |= RTF_ANYCAST;
497 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
500 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
501 rt->rt6i_dst.plen = 128;
502 rt->rt6i_flags |= RTF_CACHE;
503 rt->u.dst.flags |= DST_HOST;
505 #ifdef CONFIG_IPV6_SUBTREES
506 if (rt->rt6i_src.plen && saddr) {
507 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
508 rt->rt6i_src.plen = 128;
510 #endif
512 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
516 return rt;
519 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
521 struct rt6_info *rt = ip6_rt_copy(ort);
522 if (rt) {
523 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
524 rt->rt6i_dst.plen = 128;
525 rt->rt6i_flags |= RTF_CACHE;
526 if (rt->rt6i_flags & RTF_REJECT)
527 rt->u.dst.error = ort->u.dst.error;
528 rt->u.dst.flags |= DST_HOST;
529 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
531 return rt;
534 #define BACKTRACK() \
535 if (rt == &ip6_null_entry) { \
536 while ((fn = fn->parent) != NULL) { \
537 if (fn->fn_flags & RTN_ROOT) { \
538 goto out; \
540 if (fn->fn_flags & RTN_RTINFO) \
541 goto restart; \
546 void ip6_route_input(struct sk_buff *skb)
548 struct fib6_node *fn;
549 struct rt6_info *rt, *nrt;
550 int strict;
551 int attempts = 3;
552 int err;
553 int reachable = RT6_SELECT_F_REACHABLE;
555 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
557 relookup:
558 read_lock_bh(&rt6_lock);
560 restart_2:
561 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
562 &skb->nh.ipv6h->saddr);
564 restart:
565 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
566 BACKTRACK();
567 if (rt == &ip6_null_entry ||
568 rt->rt6i_flags & RTF_CACHE)
569 goto out;
571 dst_hold(&rt->u.dst);
572 read_unlock_bh(&rt6_lock);
574 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
575 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
576 else {
577 #if CLONE_OFFLINK_ROUTE
578 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
579 #else
580 goto out2;
581 #endif
584 dst_release(&rt->u.dst);
585 rt = nrt ? : &ip6_null_entry;
587 dst_hold(&rt->u.dst);
588 if (nrt) {
589 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
590 if (!err)
591 goto out2;
594 if (--attempts <= 0)
595 goto out2;
598 * Race condition! In the gap, when rt6_lock was
599 * released someone could insert this route. Relookup.
601 dst_release(&rt->u.dst);
602 goto relookup;
604 out:
605 if (reachable) {
606 reachable = 0;
607 goto restart_2;
609 dst_hold(&rt->u.dst);
610 read_unlock_bh(&rt6_lock);
611 out2:
612 rt->u.dst.lastuse = jiffies;
613 rt->u.dst.__use++;
614 skb->dst = (struct dst_entry *) rt;
615 return;
618 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
620 struct fib6_node *fn;
621 struct rt6_info *rt, *nrt;
622 int strict;
623 int attempts = 3;
624 int err;
625 int reachable = RT6_SELECT_F_REACHABLE;
627 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
629 relookup:
630 read_lock_bh(&rt6_lock);
632 restart_2:
633 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
635 restart:
636 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
637 BACKTRACK();
638 if (rt == &ip6_null_entry ||
639 rt->rt6i_flags & RTF_CACHE)
640 goto out;
642 dst_hold(&rt->u.dst);
643 read_unlock_bh(&rt6_lock);
645 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
646 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
647 else {
648 #if CLONE_OFFLINK_ROUTE
649 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
650 #else
651 goto out2;
652 #endif
655 dst_release(&rt->u.dst);
656 rt = nrt ? : &ip6_null_entry;
658 dst_hold(&rt->u.dst);
659 if (nrt) {
660 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
661 if (!err)
662 goto out2;
665 if (--attempts <= 0)
666 goto out2;
669 * Race condition! In the gap, when rt6_lock was
670 * released someone could insert this route. Relookup.
672 dst_release(&rt->u.dst);
673 goto relookup;
675 out:
676 if (reachable) {
677 reachable = 0;
678 goto restart_2;
680 dst_hold(&rt->u.dst);
681 read_unlock_bh(&rt6_lock);
682 out2:
683 rt->u.dst.lastuse = jiffies;
684 rt->u.dst.__use++;
685 return &rt->u.dst;
690 * Destination cache support functions
693 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
695 struct rt6_info *rt;
697 rt = (struct rt6_info *) dst;
699 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
700 return dst;
702 return NULL;
705 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
707 struct rt6_info *rt = (struct rt6_info *) dst;
709 if (rt) {
710 if (rt->rt6i_flags & RTF_CACHE)
711 ip6_del_rt(rt, NULL, NULL, NULL);
712 else
713 dst_release(dst);
715 return NULL;
718 static void ip6_link_failure(struct sk_buff *skb)
720 struct rt6_info *rt;
722 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
724 rt = (struct rt6_info *) skb->dst;
725 if (rt) {
726 if (rt->rt6i_flags&RTF_CACHE) {
727 dst_set_expires(&rt->u.dst, 0);
728 rt->rt6i_flags |= RTF_EXPIRES;
729 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
730 rt->rt6i_node->fn_sernum = -1;
734 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
736 struct rt6_info *rt6 = (struct rt6_info*)dst;
738 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
739 rt6->rt6i_flags |= RTF_MODIFIED;
740 if (mtu < IPV6_MIN_MTU) {
741 mtu = IPV6_MIN_MTU;
742 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
744 dst->metrics[RTAX_MTU-1] = mtu;
748 /* Protected by rt6_lock. */
749 static struct dst_entry *ndisc_dst_gc_list;
750 static int ipv6_get_mtu(struct net_device *dev);
752 static inline unsigned int ipv6_advmss(unsigned int mtu)
754 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
756 if (mtu < ip6_rt_min_advmss)
757 mtu = ip6_rt_min_advmss;
760 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
761 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
762 * IPV6_MAXPLEN is also valid and means: "any MSS,
763 * rely only on pmtu discovery"
765 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
766 mtu = IPV6_MAXPLEN;
767 return mtu;
770 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
771 struct neighbour *neigh,
772 struct in6_addr *addr,
773 int (*output)(struct sk_buff *))
775 struct rt6_info *rt;
776 struct inet6_dev *idev = in6_dev_get(dev);
778 if (unlikely(idev == NULL))
779 return NULL;
781 rt = ip6_dst_alloc();
782 if (unlikely(rt == NULL)) {
783 in6_dev_put(idev);
784 goto out;
787 dev_hold(dev);
788 if (neigh)
789 neigh_hold(neigh);
790 else
791 neigh = ndisc_get_neigh(dev, addr);
793 rt->rt6i_dev = dev;
794 rt->rt6i_idev = idev;
795 rt->rt6i_nexthop = neigh;
796 atomic_set(&rt->u.dst.__refcnt, 1);
797 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
798 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
799 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
800 rt->u.dst.output = output;
802 #if 0 /* there's no chance to use these for ndisc */
803 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
804 ? DST_HOST
805 : 0;
806 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
807 rt->rt6i_dst.plen = 128;
808 #endif
810 write_lock_bh(&rt6_lock);
811 rt->u.dst.next = ndisc_dst_gc_list;
812 ndisc_dst_gc_list = &rt->u.dst;
813 write_unlock_bh(&rt6_lock);
815 fib6_force_start_gc();
817 out:
818 return (struct dst_entry *)rt;
821 int ndisc_dst_gc(int *more)
823 struct dst_entry *dst, *next, **pprev;
824 int freed;
826 next = NULL;
827 pprev = &ndisc_dst_gc_list;
828 freed = 0;
829 while ((dst = *pprev) != NULL) {
830 if (!atomic_read(&dst->__refcnt)) {
831 *pprev = dst->next;
832 dst_free(dst);
833 freed++;
834 } else {
835 pprev = &dst->next;
836 (*more)++;
840 return freed;
843 static int ip6_dst_gc(void)
845 static unsigned expire = 30*HZ;
846 static unsigned long last_gc;
847 unsigned long now = jiffies;
849 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
850 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
851 goto out;
853 expire++;
854 fib6_run_gc(expire);
855 last_gc = now;
856 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
857 expire = ip6_rt_gc_timeout>>1;
859 out:
860 expire -= expire>>ip6_rt_gc_elasticity;
861 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
864 /* Clean host part of a prefix. Not necessary in radix tree,
865 but results in cleaner routing tables.
867 Remove it only when all the things will work!
870 static int ipv6_get_mtu(struct net_device *dev)
872 int mtu = IPV6_MIN_MTU;
873 struct inet6_dev *idev;
875 idev = in6_dev_get(dev);
876 if (idev) {
877 mtu = idev->cnf.mtu6;
878 in6_dev_put(idev);
880 return mtu;
883 int ipv6_get_hoplimit(struct net_device *dev)
885 int hoplimit = ipv6_devconf.hop_limit;
886 struct inet6_dev *idev;
888 idev = in6_dev_get(dev);
889 if (idev) {
890 hoplimit = idev->cnf.hop_limit;
891 in6_dev_put(idev);
893 return hoplimit;
900 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
901 void *_rtattr, struct netlink_skb_parms *req)
903 int err;
904 struct rtmsg *r;
905 struct rtattr **rta;
906 struct rt6_info *rt = NULL;
907 struct net_device *dev = NULL;
908 struct inet6_dev *idev = NULL;
909 int addr_type;
911 rta = (struct rtattr **) _rtattr;
913 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
914 return -EINVAL;
915 #ifndef CONFIG_IPV6_SUBTREES
916 if (rtmsg->rtmsg_src_len)
917 return -EINVAL;
918 #endif
919 if (rtmsg->rtmsg_ifindex) {
920 err = -ENODEV;
921 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
922 if (!dev)
923 goto out;
924 idev = in6_dev_get(dev);
925 if (!idev)
926 goto out;
929 if (rtmsg->rtmsg_metric == 0)
930 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
932 rt = ip6_dst_alloc();
934 if (rt == NULL) {
935 err = -ENOMEM;
936 goto out;
939 rt->u.dst.obsolete = -1;
940 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
941 if (nlh && (r = NLMSG_DATA(nlh))) {
942 rt->rt6i_protocol = r->rtm_protocol;
943 } else {
944 rt->rt6i_protocol = RTPROT_BOOT;
947 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
949 if (addr_type & IPV6_ADDR_MULTICAST)
950 rt->u.dst.input = ip6_mc_input;
951 else
952 rt->u.dst.input = ip6_forward;
954 rt->u.dst.output = ip6_output;
956 ipv6_addr_prefix(&rt->rt6i_dst.addr,
957 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
958 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
959 if (rt->rt6i_dst.plen == 128)
960 rt->u.dst.flags = DST_HOST;
962 #ifdef CONFIG_IPV6_SUBTREES
963 ipv6_addr_prefix(&rt->rt6i_src.addr,
964 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
965 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
966 #endif
968 rt->rt6i_metric = rtmsg->rtmsg_metric;
970 /* We cannot add true routes via loopback here,
971 they would result in kernel looping; promote them to reject routes
973 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
974 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
975 /* hold loopback dev/idev if we haven't done so. */
976 if (dev != &loopback_dev) {
977 if (dev) {
978 dev_put(dev);
979 in6_dev_put(idev);
981 dev = &loopback_dev;
982 dev_hold(dev);
983 idev = in6_dev_get(dev);
984 if (!idev) {
985 err = -ENODEV;
986 goto out;
989 rt->u.dst.output = ip6_pkt_discard_out;
990 rt->u.dst.input = ip6_pkt_discard;
991 rt->u.dst.error = -ENETUNREACH;
992 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
993 goto install_route;
996 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
997 struct in6_addr *gw_addr;
998 int gwa_type;
1000 gw_addr = &rtmsg->rtmsg_gateway;
1001 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1002 gwa_type = ipv6_addr_type(gw_addr);
1004 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1005 struct rt6_info *grt;
1007 /* IPv6 strictly inhibits using not link-local
1008 addresses as nexthop address.
1009 Otherwise, router will not able to send redirects.
1010 It is very good, but in some (rare!) circumstances
1011 (SIT, PtP, NBMA NOARP links) it is handy to allow
1012 some exceptions. --ANK
1014 err = -EINVAL;
1015 if (!(gwa_type&IPV6_ADDR_UNICAST))
1016 goto out;
1018 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1020 err = -EHOSTUNREACH;
1021 if (grt == NULL)
1022 goto out;
1023 if (dev) {
1024 if (dev != grt->rt6i_dev) {
1025 dst_release(&grt->u.dst);
1026 goto out;
1028 } else {
1029 dev = grt->rt6i_dev;
1030 idev = grt->rt6i_idev;
1031 dev_hold(dev);
1032 in6_dev_hold(grt->rt6i_idev);
1034 if (!(grt->rt6i_flags&RTF_GATEWAY))
1035 err = 0;
1036 dst_release(&grt->u.dst);
1038 if (err)
1039 goto out;
1041 err = -EINVAL;
1042 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1043 goto out;
1046 err = -ENODEV;
1047 if (dev == NULL)
1048 goto out;
1050 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1051 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1052 if (IS_ERR(rt->rt6i_nexthop)) {
1053 err = PTR_ERR(rt->rt6i_nexthop);
1054 rt->rt6i_nexthop = NULL;
1055 goto out;
1059 rt->rt6i_flags = rtmsg->rtmsg_flags;
1061 install_route:
1062 if (rta && rta[RTA_METRICS-1]) {
1063 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1064 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1066 while (RTA_OK(attr, attrlen)) {
1067 unsigned flavor = attr->rta_type;
1068 if (flavor) {
1069 if (flavor > RTAX_MAX) {
1070 err = -EINVAL;
1071 goto out;
1073 rt->u.dst.metrics[flavor-1] =
1074 *(u32 *)RTA_DATA(attr);
1076 attr = RTA_NEXT(attr, attrlen);
1080 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1081 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1082 if (!rt->u.dst.metrics[RTAX_MTU-1])
1083 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1084 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1085 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1086 rt->u.dst.dev = dev;
1087 rt->rt6i_idev = idev;
1088 return ip6_ins_rt(rt, nlh, _rtattr, req);
1090 out:
1091 if (dev)
1092 dev_put(dev);
1093 if (idev)
1094 in6_dev_put(idev);
1095 if (rt)
1096 dst_free((struct dst_entry *) rt);
1097 return err;
1100 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1102 int err;
1104 write_lock_bh(&rt6_lock);
1106 err = fib6_del(rt, nlh, _rtattr, req);
1107 dst_release(&rt->u.dst);
1109 write_unlock_bh(&rt6_lock);
1111 return err;
1114 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1116 struct fib6_node *fn;
1117 struct rt6_info *rt;
1118 int err = -ESRCH;
1120 read_lock_bh(&rt6_lock);
1122 fn = fib6_locate(&ip6_routing_table,
1123 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1124 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1126 if (fn) {
1127 for (rt = fn->leaf; rt; rt = rt->u.next) {
1128 if (rtmsg->rtmsg_ifindex &&
1129 (rt->rt6i_dev == NULL ||
1130 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1131 continue;
1132 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1133 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1134 continue;
1135 if (rtmsg->rtmsg_metric &&
1136 rtmsg->rtmsg_metric != rt->rt6i_metric)
1137 continue;
1138 dst_hold(&rt->u.dst);
1139 read_unlock_bh(&rt6_lock);
1141 return ip6_del_rt(rt, nlh, _rtattr, req);
1144 read_unlock_bh(&rt6_lock);
1146 return err;
1150 * Handle redirects
1152 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1153 struct neighbour *neigh, u8 *lladdr, int on_link)
1155 struct rt6_info *rt, *nrt = NULL;
1156 int strict;
1157 struct fib6_node *fn;
1160 * Get the "current" route for this destination and
1161 * check if the redirect has come from approriate router.
1163 * RFC 2461 specifies that redirects should only be
1164 * accepted if they come from the nexthop to the target.
1165 * Due to the way the routes are chosen, this notion
1166 * is a bit fuzzy and one might need to check all possible
1167 * routes.
1169 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
1171 read_lock_bh(&rt6_lock);
1172 fn = fib6_lookup(&ip6_routing_table, dest, NULL);
1173 restart:
1174 for (rt = fn->leaf; rt; rt = rt->u.next) {
1176 * Current route is on-link; redirect is always invalid.
1178 * Seems, previous statement is not true. It could
1179 * be node, which looks for us as on-link (f.e. proxy ndisc)
1180 * But then router serving it might decide, that we should
1181 * know truth 8)8) --ANK (980726).
1183 if (rt6_check_expired(rt))
1184 continue;
1185 if (!(rt->rt6i_flags & RTF_GATEWAY))
1186 continue;
1187 if (neigh->dev != rt->rt6i_dev)
1188 continue;
1189 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1190 continue;
1191 break;
1193 if (rt)
1194 dst_hold(&rt->u.dst);
1195 else if (strict) {
1196 while ((fn = fn->parent) != NULL) {
1197 if (fn->fn_flags & RTN_ROOT)
1198 break;
1199 if (fn->fn_flags & RTN_RTINFO)
1200 goto restart;
1203 read_unlock_bh(&rt6_lock);
1205 if (!rt) {
1206 if (net_ratelimit())
1207 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1208 "for redirect target\n");
1209 return;
1213 * We have finally decided to accept it.
1216 neigh_update(neigh, lladdr, NUD_STALE,
1217 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1218 NEIGH_UPDATE_F_OVERRIDE|
1219 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1220 NEIGH_UPDATE_F_ISROUTER))
1224 * Redirect received -> path was valid.
1225 * Look, redirects are sent only in response to data packets,
1226 * so that this nexthop apparently is reachable. --ANK
1228 dst_confirm(&rt->u.dst);
1230 /* Duplicate redirect: silently ignore. */
1231 if (neigh == rt->u.dst.neighbour)
1232 goto out;
1234 nrt = ip6_rt_copy(rt);
1235 if (nrt == NULL)
1236 goto out;
1238 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1239 if (on_link)
1240 nrt->rt6i_flags &= ~RTF_GATEWAY;
1242 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1243 nrt->rt6i_dst.plen = 128;
1244 nrt->u.dst.flags |= DST_HOST;
1246 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1247 nrt->rt6i_nexthop = neigh_clone(neigh);
1248 /* Reset pmtu, it may be better */
1249 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1250 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1252 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1253 goto out;
1255 if (rt->rt6i_flags&RTF_CACHE) {
1256 ip6_del_rt(rt, NULL, NULL, NULL);
1257 return;
1260 out:
1261 dst_release(&rt->u.dst);
1262 return;
1266 * Handle ICMP "packet too big" messages
1267 * i.e. Path MTU discovery
1270 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1271 struct net_device *dev, u32 pmtu)
1273 struct rt6_info *rt, *nrt;
1274 int allfrag = 0;
1276 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1277 if (rt == NULL)
1278 return;
1280 if (pmtu >= dst_mtu(&rt->u.dst))
1281 goto out;
1283 if (pmtu < IPV6_MIN_MTU) {
1285 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1286 * MTU (1280) and a fragment header should always be included
1287 * after a node receiving Too Big message reporting PMTU is
1288 * less than the IPv6 Minimum Link MTU.
1290 pmtu = IPV6_MIN_MTU;
1291 allfrag = 1;
1294 /* New mtu received -> path was valid.
1295 They are sent only in response to data packets,
1296 so that this nexthop apparently is reachable. --ANK
1298 dst_confirm(&rt->u.dst);
1300 /* Host route. If it is static, it would be better
1301 not to override it, but add new one, so that
1302 when cache entry will expire old pmtu
1303 would return automatically.
1305 if (rt->rt6i_flags & RTF_CACHE) {
1306 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1307 if (allfrag)
1308 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1309 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1310 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1311 goto out;
1314 /* Network route.
1315 Two cases are possible:
1316 1. It is connected route. Action: COW
1317 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1319 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1320 nrt = rt6_alloc_cow(rt, daddr, saddr);
1321 else
1322 nrt = rt6_alloc_clone(rt, daddr);
1324 if (nrt) {
1325 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1326 if (allfrag)
1327 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1329 /* According to RFC 1981, detecting PMTU increase shouldn't be
1330 * happened within 5 mins, the recommended timer is 10 mins.
1331 * Here this route expiration time is set to ip6_rt_mtu_expires
1332 * which is 10 mins. After 10 mins the decreased pmtu is expired
1333 * and detecting PMTU increase will be automatically happened.
1335 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1336 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1338 ip6_ins_rt(nrt, NULL, NULL, NULL);
1340 out:
1341 dst_release(&rt->u.dst);
1345 * Misc support functions
1348 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1350 struct rt6_info *rt = ip6_dst_alloc();
1352 if (rt) {
1353 rt->u.dst.input = ort->u.dst.input;
1354 rt->u.dst.output = ort->u.dst.output;
1356 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1357 rt->u.dst.dev = ort->u.dst.dev;
1358 if (rt->u.dst.dev)
1359 dev_hold(rt->u.dst.dev);
1360 rt->rt6i_idev = ort->rt6i_idev;
1361 if (rt->rt6i_idev)
1362 in6_dev_hold(rt->rt6i_idev);
1363 rt->u.dst.lastuse = jiffies;
1364 rt->rt6i_expires = 0;
1366 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1367 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1368 rt->rt6i_metric = 0;
1370 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1371 #ifdef CONFIG_IPV6_SUBTREES
1372 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1373 #endif
1375 return rt;
1378 #ifdef CONFIG_IPV6_ROUTE_INFO
1379 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1380 struct in6_addr *gwaddr, int ifindex)
1382 struct fib6_node *fn;
1383 struct rt6_info *rt = NULL;
1385 write_lock_bh(&rt6_lock);
1386 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1387 if (!fn)
1388 goto out;
1390 for (rt = fn->leaf; rt; rt = rt->u.next) {
1391 if (rt->rt6i_dev->ifindex != ifindex)
1392 continue;
1393 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1394 continue;
1395 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1396 continue;
1397 dst_hold(&rt->u.dst);
1398 break;
1400 out:
1401 write_unlock_bh(&rt6_lock);
1402 return rt;
1405 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1406 struct in6_addr *gwaddr, int ifindex,
1407 unsigned pref)
1409 struct in6_rtmsg rtmsg;
1411 memset(&rtmsg, 0, sizeof(rtmsg));
1412 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1413 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1414 rtmsg.rtmsg_dst_len = prefixlen;
1415 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1416 rtmsg.rtmsg_metric = 1024;
1417 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
1418 /* We should treat it as a default route if prefix length is 0. */
1419 if (!prefixlen)
1420 rtmsg.rtmsg_flags |= RTF_DEFAULT;
1421 rtmsg.rtmsg_ifindex = ifindex;
1423 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1425 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1427 #endif
1429 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1431 struct rt6_info *rt;
1432 struct fib6_node *fn;
1434 fn = &ip6_routing_table;
1436 write_lock_bh(&rt6_lock);
1437 for (rt = fn->leaf; rt; rt=rt->u.next) {
1438 if (dev == rt->rt6i_dev &&
1439 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1440 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1441 break;
1443 if (rt)
1444 dst_hold(&rt->u.dst);
1445 write_unlock_bh(&rt6_lock);
1446 return rt;
1449 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1450 struct net_device *dev,
1451 unsigned int pref)
1453 struct in6_rtmsg rtmsg;
1455 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1456 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1457 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1458 rtmsg.rtmsg_metric = 1024;
1459 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1460 RTF_PREF(pref);
1462 rtmsg.rtmsg_ifindex = dev->ifindex;
1464 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1465 return rt6_get_dflt_router(gwaddr, dev);
1468 void rt6_purge_dflt_routers(void)
1470 struct rt6_info *rt;
1472 restart:
1473 read_lock_bh(&rt6_lock);
1474 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1475 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1476 dst_hold(&rt->u.dst);
1478 read_unlock_bh(&rt6_lock);
1480 ip6_del_rt(rt, NULL, NULL, NULL);
1482 goto restart;
1485 read_unlock_bh(&rt6_lock);
1488 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1490 struct in6_rtmsg rtmsg;
1491 int err;
1493 switch(cmd) {
1494 case SIOCADDRT: /* Add a route */
1495 case SIOCDELRT: /* Delete a route */
1496 if (!capable(CAP_NET_ADMIN))
1497 return -EPERM;
1498 err = copy_from_user(&rtmsg, arg,
1499 sizeof(struct in6_rtmsg));
1500 if (err)
1501 return -EFAULT;
1503 rtnl_lock();
1504 switch (cmd) {
1505 case SIOCADDRT:
1506 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1507 break;
1508 case SIOCDELRT:
1509 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1510 break;
1511 default:
1512 err = -EINVAL;
1514 rtnl_unlock();
1516 return err;
1519 return -EINVAL;
1523 * Drop the packet on the floor
1526 static int ip6_pkt_discard(struct sk_buff *skb)
1528 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1529 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1530 kfree_skb(skb);
1531 return 0;
1534 static int ip6_pkt_discard_out(struct sk_buff *skb)
1536 skb->dev = skb->dst->dev;
1537 return ip6_pkt_discard(skb);
1541 * Allocate a dst for local (unicast / anycast) address.
1544 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1545 const struct in6_addr *addr,
1546 int anycast)
1548 struct rt6_info *rt = ip6_dst_alloc();
1550 if (rt == NULL)
1551 return ERR_PTR(-ENOMEM);
1553 dev_hold(&loopback_dev);
1554 in6_dev_hold(idev);
1556 rt->u.dst.flags = DST_HOST;
1557 rt->u.dst.input = ip6_input;
1558 rt->u.dst.output = ip6_output;
1559 rt->rt6i_dev = &loopback_dev;
1560 rt->rt6i_idev = idev;
1561 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1562 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1563 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1564 rt->u.dst.obsolete = -1;
1566 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1567 if (anycast)
1568 rt->rt6i_flags |= RTF_ANYCAST;
1569 else
1570 rt->rt6i_flags |= RTF_LOCAL;
1571 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1572 if (rt->rt6i_nexthop == NULL) {
1573 dst_free((struct dst_entry *) rt);
1574 return ERR_PTR(-ENOMEM);
1577 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1578 rt->rt6i_dst.plen = 128;
1580 atomic_set(&rt->u.dst.__refcnt, 1);
1582 return rt;
1585 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1587 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1588 rt != &ip6_null_entry) {
1589 RT6_TRACE("deleted by ifdown %p\n", rt);
1590 return -1;
1592 return 0;
1595 void rt6_ifdown(struct net_device *dev)
1597 write_lock_bh(&rt6_lock);
1598 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1599 write_unlock_bh(&rt6_lock);
1602 struct rt6_mtu_change_arg
1604 struct net_device *dev;
1605 unsigned mtu;
1608 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1610 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1611 struct inet6_dev *idev;
1613 /* In IPv6 pmtu discovery is not optional,
1614 so that RTAX_MTU lock cannot disable it.
1615 We still use this lock to block changes
1616 caused by addrconf/ndisc.
1619 idev = __in6_dev_get(arg->dev);
1620 if (idev == NULL)
1621 return 0;
1623 /* For administrative MTU increase, there is no way to discover
1624 IPv6 PMTU increase, so PMTU increase should be updated here.
1625 Since RFC 1981 doesn't include administrative MTU increase
1626 update PMTU increase is a MUST. (i.e. jumbo frame)
1629 If new MTU is less than route PMTU, this new MTU will be the
1630 lowest MTU in the path, update the route PMTU to reflect PMTU
1631 decreases; if new MTU is greater than route PMTU, and the
1632 old MTU is the lowest MTU in the path, update the route PMTU
1633 to reflect the increase. In this case if the other nodes' MTU
1634 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1635 PMTU discouvery.
1637 if (rt->rt6i_dev == arg->dev &&
1638 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1639 (dst_mtu(&rt->u.dst) > arg->mtu ||
1640 (dst_mtu(&rt->u.dst) < arg->mtu &&
1641 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1642 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1643 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1644 return 0;
1647 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1649 struct rt6_mtu_change_arg arg;
1651 arg.dev = dev;
1652 arg.mtu = mtu;
1653 read_lock_bh(&rt6_lock);
1654 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1655 read_unlock_bh(&rt6_lock);
1658 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1659 struct in6_rtmsg *rtmsg)
1661 memset(rtmsg, 0, sizeof(*rtmsg));
1663 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1664 rtmsg->rtmsg_src_len = r->rtm_src_len;
1665 rtmsg->rtmsg_flags = RTF_UP;
1666 if (r->rtm_type == RTN_UNREACHABLE)
1667 rtmsg->rtmsg_flags |= RTF_REJECT;
1669 if (rta[RTA_GATEWAY-1]) {
1670 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1671 return -EINVAL;
1672 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1673 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1675 if (rta[RTA_DST-1]) {
1676 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1677 return -EINVAL;
1678 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1680 if (rta[RTA_SRC-1]) {
1681 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1682 return -EINVAL;
1683 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1685 if (rta[RTA_OIF-1]) {
1686 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1687 return -EINVAL;
1688 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1690 if (rta[RTA_PRIORITY-1]) {
1691 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1692 return -EINVAL;
1693 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1695 return 0;
1698 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1700 struct rtmsg *r = NLMSG_DATA(nlh);
1701 struct in6_rtmsg rtmsg;
1703 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1704 return -EINVAL;
1705 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1708 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1710 struct rtmsg *r = NLMSG_DATA(nlh);
1711 struct in6_rtmsg rtmsg;
1713 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1714 return -EINVAL;
1715 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1718 struct rt6_rtnl_dump_arg
1720 struct sk_buff *skb;
1721 struct netlink_callback *cb;
1724 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1725 struct in6_addr *dst, struct in6_addr *src,
1726 int iif, int type, u32 pid, u32 seq,
1727 int prefix, unsigned int flags)
1729 struct rtmsg *rtm;
1730 struct nlmsghdr *nlh;
1731 unsigned char *b = skb->tail;
1732 struct rta_cacheinfo ci;
1734 if (prefix) { /* user wants prefix routes only */
1735 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1736 /* success since this is not a prefix route */
1737 return 1;
1741 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1742 rtm = NLMSG_DATA(nlh);
1743 rtm->rtm_family = AF_INET6;
1744 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1745 rtm->rtm_src_len = rt->rt6i_src.plen;
1746 rtm->rtm_tos = 0;
1747 rtm->rtm_table = RT_TABLE_MAIN;
1748 if (rt->rt6i_flags&RTF_REJECT)
1749 rtm->rtm_type = RTN_UNREACHABLE;
1750 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1751 rtm->rtm_type = RTN_LOCAL;
1752 else
1753 rtm->rtm_type = RTN_UNICAST;
1754 rtm->rtm_flags = 0;
1755 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1756 rtm->rtm_protocol = rt->rt6i_protocol;
1757 if (rt->rt6i_flags&RTF_DYNAMIC)
1758 rtm->rtm_protocol = RTPROT_REDIRECT;
1759 else if (rt->rt6i_flags & RTF_ADDRCONF)
1760 rtm->rtm_protocol = RTPROT_KERNEL;
1761 else if (rt->rt6i_flags&RTF_DEFAULT)
1762 rtm->rtm_protocol = RTPROT_RA;
1764 if (rt->rt6i_flags&RTF_CACHE)
1765 rtm->rtm_flags |= RTM_F_CLONED;
1767 if (dst) {
1768 RTA_PUT(skb, RTA_DST, 16, dst);
1769 rtm->rtm_dst_len = 128;
1770 } else if (rtm->rtm_dst_len)
1771 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1772 #ifdef CONFIG_IPV6_SUBTREES
1773 if (src) {
1774 RTA_PUT(skb, RTA_SRC, 16, src);
1775 rtm->rtm_src_len = 128;
1776 } else if (rtm->rtm_src_len)
1777 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1778 #endif
1779 if (iif)
1780 RTA_PUT(skb, RTA_IIF, 4, &iif);
1781 else if (dst) {
1782 struct in6_addr saddr_buf;
1783 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1784 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1786 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1787 goto rtattr_failure;
1788 if (rt->u.dst.neighbour)
1789 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1790 if (rt->u.dst.dev)
1791 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1792 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1793 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1794 if (rt->rt6i_expires)
1795 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1796 else
1797 ci.rta_expires = 0;
1798 ci.rta_used = rt->u.dst.__use;
1799 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1800 ci.rta_error = rt->u.dst.error;
1801 ci.rta_id = 0;
1802 ci.rta_ts = 0;
1803 ci.rta_tsage = 0;
1804 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1805 nlh->nlmsg_len = skb->tail - b;
1806 return skb->len;
1808 nlmsg_failure:
1809 rtattr_failure:
1810 skb_trim(skb, b - skb->data);
1811 return -1;
1814 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1816 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1817 int prefix;
1819 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1820 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1821 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1822 } else
1823 prefix = 0;
1825 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1826 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1827 prefix, NLM_F_MULTI);
1830 static int fib6_dump_node(struct fib6_walker_t *w)
1832 int res;
1833 struct rt6_info *rt;
1835 for (rt = w->leaf; rt; rt = rt->u.next) {
1836 res = rt6_dump_route(rt, w->args);
1837 if (res < 0) {
1838 /* Frame is full, suspend walking */
1839 w->leaf = rt;
1840 return 1;
1842 BUG_TRAP(res!=0);
1844 w->leaf = NULL;
1845 return 0;
1848 static void fib6_dump_end(struct netlink_callback *cb)
1850 struct fib6_walker_t *w = (void*)cb->args[0];
1852 if (w) {
1853 cb->args[0] = 0;
1854 fib6_walker_unlink(w);
1855 kfree(w);
1857 cb->done = (void*)cb->args[1];
1858 cb->args[1] = 0;
1861 static int fib6_dump_done(struct netlink_callback *cb)
1863 fib6_dump_end(cb);
1864 return cb->done ? cb->done(cb) : 0;
1867 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1869 struct rt6_rtnl_dump_arg arg;
1870 struct fib6_walker_t *w;
1871 int res;
1873 arg.skb = skb;
1874 arg.cb = cb;
1876 w = (void*)cb->args[0];
1877 if (w == NULL) {
1878 /* New dump:
1880 * 1. hook callback destructor.
1882 cb->args[1] = (long)cb->done;
1883 cb->done = fib6_dump_done;
1886 * 2. allocate and initialize walker.
1888 w = kzalloc(sizeof(*w), GFP_ATOMIC);
1889 if (w == NULL)
1890 return -ENOMEM;
1891 RT6_TRACE("dump<%p", w);
1892 w->root = &ip6_routing_table;
1893 w->func = fib6_dump_node;
1894 w->args = &arg;
1895 cb->args[0] = (long)w;
1896 read_lock_bh(&rt6_lock);
1897 res = fib6_walk(w);
1898 read_unlock_bh(&rt6_lock);
1899 } else {
1900 w->args = &arg;
1901 read_lock_bh(&rt6_lock);
1902 res = fib6_walk_continue(w);
1903 read_unlock_bh(&rt6_lock);
1905 #if RT6_DEBUG >= 3
1906 if (res <= 0 && skb->len == 0)
1907 RT6_TRACE("%p>dump end\n", w);
1908 #endif
1909 res = res < 0 ? res : skb->len;
1910 /* res < 0 is an error. (really, impossible)
1911 res == 0 means that dump is complete, but skb still can contain data.
1912 res > 0 dump is not complete, but frame is full.
1914 /* Destroy walker, if dump of this table is complete. */
1915 if (res <= 0)
1916 fib6_dump_end(cb);
1917 return res;
1920 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1922 struct rtattr **rta = arg;
1923 int iif = 0;
1924 int err = -ENOBUFS;
1925 struct sk_buff *skb;
1926 struct flowi fl;
1927 struct rt6_info *rt;
1929 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1930 if (skb == NULL)
1931 goto out;
1933 /* Reserve room for dummy headers, this skb can pass
1934 through good chunk of routing engine.
1936 skb->mac.raw = skb->data;
1937 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1939 memset(&fl, 0, sizeof(fl));
1940 if (rta[RTA_SRC-1])
1941 ipv6_addr_copy(&fl.fl6_src,
1942 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1943 if (rta[RTA_DST-1])
1944 ipv6_addr_copy(&fl.fl6_dst,
1945 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1947 if (rta[RTA_IIF-1])
1948 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1950 if (iif) {
1951 struct net_device *dev;
1952 dev = __dev_get_by_index(iif);
1953 if (!dev) {
1954 err = -ENODEV;
1955 goto out_free;
1959 fl.oif = 0;
1960 if (rta[RTA_OIF-1])
1961 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1963 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1965 skb->dst = &rt->u.dst;
1967 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1968 err = rt6_fill_node(skb, rt,
1969 &fl.fl6_dst, &fl.fl6_src,
1970 iif,
1971 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1972 nlh->nlmsg_seq, 0, 0);
1973 if (err < 0) {
1974 err = -EMSGSIZE;
1975 goto out_free;
1978 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1979 if (err > 0)
1980 err = 0;
1981 out:
1982 return err;
1983 out_free:
1984 kfree_skb(skb);
1985 goto out;
1988 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1989 struct netlink_skb_parms *req)
1991 struct sk_buff *skb;
1992 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1993 u32 pid = current->pid;
1994 u32 seq = 0;
1996 if (req)
1997 pid = req->pid;
1998 if (nlh)
1999 seq = nlh->nlmsg_seq;
2001 skb = alloc_skb(size, gfp_any());
2002 if (!skb) {
2003 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
2004 return;
2006 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
2007 kfree_skb(skb);
2008 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
2009 return;
2011 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2012 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
2016 * /proc
2019 #ifdef CONFIG_PROC_FS
2021 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2023 struct rt6_proc_arg
2025 char *buffer;
2026 int offset;
2027 int length;
2028 int skip;
2029 int len;
2032 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2034 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2035 int i;
2037 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2038 arg->skip++;
2039 return 0;
2042 if (arg->len >= arg->length)
2043 return 0;
2045 for (i=0; i<16; i++) {
2046 sprintf(arg->buffer + arg->len, "%02x",
2047 rt->rt6i_dst.addr.s6_addr[i]);
2048 arg->len += 2;
2050 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2051 rt->rt6i_dst.plen);
2053 #ifdef CONFIG_IPV6_SUBTREES
2054 for (i=0; i<16; i++) {
2055 sprintf(arg->buffer + arg->len, "%02x",
2056 rt->rt6i_src.addr.s6_addr[i]);
2057 arg->len += 2;
2059 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2060 rt->rt6i_src.plen);
2061 #else
2062 sprintf(arg->buffer + arg->len,
2063 "00000000000000000000000000000000 00 ");
2064 arg->len += 36;
2065 #endif
2067 if (rt->rt6i_nexthop) {
2068 for (i=0; i<16; i++) {
2069 sprintf(arg->buffer + arg->len, "%02x",
2070 rt->rt6i_nexthop->primary_key[i]);
2071 arg->len += 2;
2073 } else {
2074 sprintf(arg->buffer + arg->len,
2075 "00000000000000000000000000000000");
2076 arg->len += 32;
2078 arg->len += sprintf(arg->buffer + arg->len,
2079 " %08x %08x %08x %08x %8s\n",
2080 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2081 rt->u.dst.__use, rt->rt6i_flags,
2082 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2083 return 0;
2086 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2088 struct rt6_proc_arg arg;
2089 arg.buffer = buffer;
2090 arg.offset = offset;
2091 arg.length = length;
2092 arg.skip = 0;
2093 arg.len = 0;
2095 read_lock_bh(&rt6_lock);
2096 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2097 read_unlock_bh(&rt6_lock);
2099 *start = buffer;
2100 if (offset)
2101 *start += offset % RT6_INFO_LEN;
2103 arg.len -= offset % RT6_INFO_LEN;
2105 if (arg.len > length)
2106 arg.len = length;
2107 if (arg.len < 0)
2108 arg.len = 0;
2110 return arg.len;
2113 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2115 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2116 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2117 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2118 rt6_stats.fib_rt_cache,
2119 atomic_read(&ip6_dst_ops.entries),
2120 rt6_stats.fib_discarded_routes);
2122 return 0;
2125 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2127 return single_open(file, rt6_stats_seq_show, NULL);
2130 static struct file_operations rt6_stats_seq_fops = {
2131 .owner = THIS_MODULE,
2132 .open = rt6_stats_seq_open,
2133 .read = seq_read,
2134 .llseek = seq_lseek,
2135 .release = single_release,
2137 #endif /* CONFIG_PROC_FS */
2139 #ifdef CONFIG_SYSCTL
2141 static int flush_delay;
2143 static
2144 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2145 void __user *buffer, size_t *lenp, loff_t *ppos)
2147 if (write) {
2148 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2149 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2150 return 0;
2151 } else
2152 return -EINVAL;
2155 ctl_table ipv6_route_table[] = {
2157 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2158 .procname = "flush",
2159 .data = &flush_delay,
2160 .maxlen = sizeof(int),
2161 .mode = 0200,
2162 .proc_handler = &ipv6_sysctl_rtcache_flush
2165 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2166 .procname = "gc_thresh",
2167 .data = &ip6_dst_ops.gc_thresh,
2168 .maxlen = sizeof(int),
2169 .mode = 0644,
2170 .proc_handler = &proc_dointvec,
2173 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2174 .procname = "max_size",
2175 .data = &ip6_rt_max_size,
2176 .maxlen = sizeof(int),
2177 .mode = 0644,
2178 .proc_handler = &proc_dointvec,
2181 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2182 .procname = "gc_min_interval",
2183 .data = &ip6_rt_gc_min_interval,
2184 .maxlen = sizeof(int),
2185 .mode = 0644,
2186 .proc_handler = &proc_dointvec_jiffies,
2187 .strategy = &sysctl_jiffies,
2190 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2191 .procname = "gc_timeout",
2192 .data = &ip6_rt_gc_timeout,
2193 .maxlen = sizeof(int),
2194 .mode = 0644,
2195 .proc_handler = &proc_dointvec_jiffies,
2196 .strategy = &sysctl_jiffies,
2199 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2200 .procname = "gc_interval",
2201 .data = &ip6_rt_gc_interval,
2202 .maxlen = sizeof(int),
2203 .mode = 0644,
2204 .proc_handler = &proc_dointvec_jiffies,
2205 .strategy = &sysctl_jiffies,
2208 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2209 .procname = "gc_elasticity",
2210 .data = &ip6_rt_gc_elasticity,
2211 .maxlen = sizeof(int),
2212 .mode = 0644,
2213 .proc_handler = &proc_dointvec_jiffies,
2214 .strategy = &sysctl_jiffies,
2217 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2218 .procname = "mtu_expires",
2219 .data = &ip6_rt_mtu_expires,
2220 .maxlen = sizeof(int),
2221 .mode = 0644,
2222 .proc_handler = &proc_dointvec_jiffies,
2223 .strategy = &sysctl_jiffies,
2226 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2227 .procname = "min_adv_mss",
2228 .data = &ip6_rt_min_advmss,
2229 .maxlen = sizeof(int),
2230 .mode = 0644,
2231 .proc_handler = &proc_dointvec_jiffies,
2232 .strategy = &sysctl_jiffies,
2235 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2236 .procname = "gc_min_interval_ms",
2237 .data = &ip6_rt_gc_min_interval,
2238 .maxlen = sizeof(int),
2239 .mode = 0644,
2240 .proc_handler = &proc_dointvec_ms_jiffies,
2241 .strategy = &sysctl_ms_jiffies,
2243 { .ctl_name = 0 }
2246 #endif
2248 void __init ip6_route_init(void)
2250 struct proc_dir_entry *p;
2252 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2253 sizeof(struct rt6_info),
2254 0, SLAB_HWCACHE_ALIGN,
2255 NULL, NULL);
2256 if (!ip6_dst_ops.kmem_cachep)
2257 panic("cannot create ip6_dst_cache");
2259 fib6_init();
2260 #ifdef CONFIG_PROC_FS
2261 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2262 if (p)
2263 p->owner = THIS_MODULE;
2265 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2266 #endif
2267 #ifdef CONFIG_XFRM
2268 xfrm6_init();
2269 #endif
2272 void ip6_route_cleanup(void)
2274 #ifdef CONFIG_PROC_FS
2275 proc_net_remove("ipv6_route");
2276 proc_net_remove("rt6_stats");
2277 #endif
2278 #ifdef CONFIG_XFRM
2279 xfrm6_fini();
2280 #endif
2281 rt6_ifdown(NULL);
2282 fib6_gc_cleanup();
2283 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);