2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <asm/uaccess.h>
17 #include <asm/system.h>
18 #include <linux/bitops.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/jiffies.h>
23 #include <linux/string.h>
24 #include <linux/socket.h>
25 #include <linux/sockios.h>
26 #include <linux/errno.h>
28 #include <linux/inet.h>
29 #include <linux/inetdevice.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/proc_fs.h>
33 #include <linux/skbuff.h>
34 #include <linux/init.h>
38 #include <net/protocol.h>
39 #include <net/route.h>
42 #include <net/ip_fib.h>
43 #include <net/netlink.h>
44 #include <net/nexthop.h>
46 #include "fib_lookup.h"
48 static DEFINE_SPINLOCK(fib_info_lock
);
49 static struct hlist_head
*fib_info_hash
;
50 static struct hlist_head
*fib_info_laddrhash
;
51 static unsigned int fib_hash_size
;
52 static unsigned int fib_info_cnt
;
54 #define DEVINDEX_HASHBITS 8
55 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
56 static struct hlist_head fib_info_devhash
[DEVINDEX_HASHSIZE
];
58 #ifdef CONFIG_IP_ROUTE_MULTIPATH
60 static DEFINE_SPINLOCK(fib_multipath_lock
);
62 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
63 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
65 #define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \
66 for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++)
68 #else /* CONFIG_IP_ROUTE_MULTIPATH */
70 /* Hope, that gcc will optimize it to get rid of dummy loop */
72 #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \
73 for (nhsel=0; nhsel < 1; nhsel++)
75 #define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
76 for (nhsel=0; nhsel < 1; nhsel++)
78 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
80 #define endfor_nexthops(fi) }
87 } fib_props
[RTN_MAX
+ 1] = {
90 .scope
= RT_SCOPE_NOWHERE
,
94 .scope
= RT_SCOPE_UNIVERSE
,
98 .scope
= RT_SCOPE_HOST
,
102 .scope
= RT_SCOPE_LINK
,
103 }, /* RTN_BROADCAST */
106 .scope
= RT_SCOPE_LINK
,
110 .scope
= RT_SCOPE_UNIVERSE
,
111 }, /* RTN_MULTICAST */
114 .scope
= RT_SCOPE_UNIVERSE
,
115 }, /* RTN_BLACKHOLE */
117 .error
= -EHOSTUNREACH
,
118 .scope
= RT_SCOPE_UNIVERSE
,
119 }, /* RTN_UNREACHABLE */
122 .scope
= RT_SCOPE_UNIVERSE
,
123 }, /* RTN_PROHIBIT */
126 .scope
= RT_SCOPE_UNIVERSE
,
130 .scope
= RT_SCOPE_NOWHERE
,
134 .scope
= RT_SCOPE_NOWHERE
,
135 }, /* RTN_XRESOLVE */
139 /* Release a nexthop info record */
141 void free_fib_info(struct fib_info
*fi
)
143 if (fi
->fib_dead
== 0) {
144 printk(KERN_WARNING
"Freeing alive fib_info %p\n", fi
);
147 change_nexthops(fi
) {
148 if (nexthop_nh
->nh_dev
)
149 dev_put(nexthop_nh
->nh_dev
);
150 nexthop_nh
->nh_dev
= NULL
;
151 } endfor_nexthops(fi
);
153 release_net(fi
->fib_net
);
157 void fib_release_info(struct fib_info
*fi
)
159 spin_lock_bh(&fib_info_lock
);
160 if (fi
&& --fi
->fib_treeref
== 0) {
161 hlist_del(&fi
->fib_hash
);
163 hlist_del(&fi
->fib_lhash
);
164 change_nexthops(fi
) {
165 if (!nexthop_nh
->nh_dev
)
167 hlist_del(&nexthop_nh
->nh_hash
);
168 } endfor_nexthops(fi
)
172 spin_unlock_bh(&fib_info_lock
);
175 static __inline__
int nh_comp(const struct fib_info
*fi
, const struct fib_info
*ofi
)
177 const struct fib_nh
*onh
= ofi
->fib_nh
;
180 if (nh
->nh_oif
!= onh
->nh_oif
||
181 nh
->nh_gw
!= onh
->nh_gw
||
182 nh
->nh_scope
!= onh
->nh_scope
||
183 #ifdef CONFIG_IP_ROUTE_MULTIPATH
184 nh
->nh_weight
!= onh
->nh_weight
||
186 #ifdef CONFIG_NET_CLS_ROUTE
187 nh
->nh_tclassid
!= onh
->nh_tclassid
||
189 ((nh
->nh_flags
^onh
->nh_flags
)&~RTNH_F_DEAD
))
192 } endfor_nexthops(fi
);
196 static inline unsigned int fib_devindex_hashfn(unsigned int val
)
198 unsigned int mask
= DEVINDEX_HASHSIZE
- 1;
201 (val
>> DEVINDEX_HASHBITS
) ^
202 (val
>> (DEVINDEX_HASHBITS
* 2))) & mask
;
205 static inline unsigned int fib_info_hashfn(const struct fib_info
*fi
)
207 unsigned int mask
= (fib_hash_size
- 1);
208 unsigned int val
= fi
->fib_nhs
;
210 val
^= fi
->fib_protocol
;
211 val
^= (__force u32
)fi
->fib_prefsrc
;
212 val
^= fi
->fib_priority
;
214 val
^= fib_devindex_hashfn(nh
->nh_oif
);
215 } endfor_nexthops(fi
)
217 return (val
^ (val
>> 7) ^ (val
>> 12)) & mask
;
220 static struct fib_info
*fib_find_info(const struct fib_info
*nfi
)
222 struct hlist_head
*head
;
223 struct hlist_node
*node
;
227 hash
= fib_info_hashfn(nfi
);
228 head
= &fib_info_hash
[hash
];
230 hlist_for_each_entry(fi
, node
, head
, fib_hash
) {
231 if (!net_eq(fi
->fib_net
, nfi
->fib_net
))
233 if (fi
->fib_nhs
!= nfi
->fib_nhs
)
235 if (nfi
->fib_protocol
== fi
->fib_protocol
&&
236 nfi
->fib_prefsrc
== fi
->fib_prefsrc
&&
237 nfi
->fib_priority
== fi
->fib_priority
&&
238 memcmp(nfi
->fib_metrics
, fi
->fib_metrics
,
239 sizeof(fi
->fib_metrics
)) == 0 &&
240 ((nfi
->fib_flags
^fi
->fib_flags
)&~RTNH_F_DEAD
) == 0 &&
241 (nfi
->fib_nhs
== 0 || nh_comp(fi
, nfi
) == 0))
248 /* Check, that the gateway is already configured.
249 Used only by redirect accept routine.
252 int ip_fib_check_default(__be32 gw
, struct net_device
*dev
)
254 struct hlist_head
*head
;
255 struct hlist_node
*node
;
259 spin_lock(&fib_info_lock
);
261 hash
= fib_devindex_hashfn(dev
->ifindex
);
262 head
= &fib_info_devhash
[hash
];
263 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
264 if (nh
->nh_dev
== dev
&&
266 !(nh
->nh_flags
&RTNH_F_DEAD
)) {
267 spin_unlock(&fib_info_lock
);
272 spin_unlock(&fib_info_lock
);
277 static inline size_t fib_nlmsg_size(struct fib_info
*fi
)
279 size_t payload
= NLMSG_ALIGN(sizeof(struct rtmsg
))
280 + nla_total_size(4) /* RTA_TABLE */
281 + nla_total_size(4) /* RTA_DST */
282 + nla_total_size(4) /* RTA_PRIORITY */
283 + nla_total_size(4); /* RTA_PREFSRC */
285 /* space for nested metrics */
286 payload
+= nla_total_size((RTAX_MAX
* nla_total_size(4)));
289 /* Also handles the special case fib_nhs == 1 */
291 /* each nexthop is packed in an attribute */
292 size_t nhsize
= nla_total_size(sizeof(struct rtnexthop
));
294 /* may contain flow and gateway attribute */
295 nhsize
+= 2 * nla_total_size(4);
297 /* all nexthops are packed in a nested attribute */
298 payload
+= nla_total_size(fi
->fib_nhs
* nhsize
);
304 void rtmsg_fib(int event
, __be32 key
, struct fib_alias
*fa
,
305 int dst_len
, u32 tb_id
, struct nl_info
*info
,
306 unsigned int nlm_flags
)
309 u32 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
312 skb
= nlmsg_new(fib_nlmsg_size(fa
->fa_info
), GFP_KERNEL
);
316 err
= fib_dump_info(skb
, info
->pid
, seq
, event
, tb_id
,
317 fa
->fa_type
, fa
->fa_scope
, key
, dst_len
,
318 fa
->fa_tos
, fa
->fa_info
, nlm_flags
);
320 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
321 WARN_ON(err
== -EMSGSIZE
);
325 rtnl_notify(skb
, info
->nl_net
, info
->pid
, RTNLGRP_IPV4_ROUTE
,
326 info
->nlh
, GFP_KERNEL
);
330 rtnl_set_sk_err(info
->nl_net
, RTNLGRP_IPV4_ROUTE
, err
);
333 /* Return the first fib alias matching TOS with
334 * priority less than or equal to PRIO.
336 struct fib_alias
*fib_find_alias(struct list_head
*fah
, u8 tos
, u32 prio
)
339 struct fib_alias
*fa
;
340 list_for_each_entry(fa
, fah
, fa_list
) {
341 if (fa
->fa_tos
> tos
)
343 if (fa
->fa_info
->fib_priority
>= prio
||
351 int fib_detect_death(struct fib_info
*fi
, int order
,
352 struct fib_info
**last_resort
, int *last_idx
, int dflt
)
355 int state
= NUD_NONE
;
357 n
= neigh_lookup(&arp_tbl
, &fi
->fib_nh
[0].nh_gw
, fi
->fib_dev
);
359 state
= n
->nud_state
;
362 if (state
== NUD_REACHABLE
)
364 if ((state
&NUD_VALID
) && order
!= dflt
)
366 if ((state
&NUD_VALID
) ||
367 (*last_idx
<0 && order
> dflt
)) {
374 #ifdef CONFIG_IP_ROUTE_MULTIPATH
376 static int fib_count_nexthops(struct rtnexthop
*rtnh
, int remaining
)
380 while (rtnh_ok(rtnh
, remaining
)) {
382 rtnh
= rtnh_next(rtnh
, &remaining
);
385 /* leftover implies invalid nexthop configuration, discard it */
386 return remaining
> 0 ? 0 : nhs
;
389 static int fib_get_nhs(struct fib_info
*fi
, struct rtnexthop
*rtnh
,
390 int remaining
, struct fib_config
*cfg
)
392 change_nexthops(fi
) {
395 if (!rtnh_ok(rtnh
, remaining
))
398 nexthop_nh
->nh_flags
=
399 (cfg
->fc_flags
& ~0xFF) | rtnh
->rtnh_flags
;
400 nexthop_nh
->nh_oif
= rtnh
->rtnh_ifindex
;
401 nexthop_nh
->nh_weight
= rtnh
->rtnh_hops
+ 1;
403 attrlen
= rtnh_attrlen(rtnh
);
405 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
407 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
408 nexthop_nh
->nh_gw
= nla
? nla_get_be32(nla
) : 0;
409 #ifdef CONFIG_NET_CLS_ROUTE
410 nla
= nla_find(attrs
, attrlen
, RTA_FLOW
);
411 nexthop_nh
->nh_tclassid
= nla
? nla_get_u32(nla
) : 0;
415 rtnh
= rtnh_next(rtnh
, &remaining
);
416 } endfor_nexthops(fi
);
423 int fib_nh_match(struct fib_config
*cfg
, struct fib_info
*fi
)
425 #ifdef CONFIG_IP_ROUTE_MULTIPATH
426 struct rtnexthop
*rtnh
;
430 if (cfg
->fc_priority
&& cfg
->fc_priority
!= fi
->fib_priority
)
433 if (cfg
->fc_oif
|| cfg
->fc_gw
) {
434 if ((!cfg
->fc_oif
|| cfg
->fc_oif
== fi
->fib_nh
->nh_oif
) &&
435 (!cfg
->fc_gw
|| cfg
->fc_gw
== fi
->fib_nh
->nh_gw
))
440 #ifdef CONFIG_IP_ROUTE_MULTIPATH
441 if (cfg
->fc_mp
== NULL
)
445 remaining
= cfg
->fc_mp_len
;
450 if (!rtnh_ok(rtnh
, remaining
))
453 if (rtnh
->rtnh_ifindex
&& rtnh
->rtnh_ifindex
!= nh
->nh_oif
)
456 attrlen
= rtnh_attrlen(rtnh
);
458 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
460 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
461 if (nla
&& nla_get_be32(nla
) != nh
->nh_gw
)
463 #ifdef CONFIG_NET_CLS_ROUTE
464 nla
= nla_find(attrs
, attrlen
, RTA_FLOW
);
465 if (nla
&& nla_get_u32(nla
) != nh
->nh_tclassid
)
470 rtnh
= rtnh_next(rtnh
, &remaining
);
471 } endfor_nexthops(fi
);
481 Semantics of nexthop is very messy by historical reasons.
482 We have to take into account, that:
483 a) gateway can be actually local interface address,
484 so that gatewayed route is direct.
485 b) gateway must be on-link address, possibly
486 described not by an ifaddr, but also by a direct route.
487 c) If both gateway and interface are specified, they should not
489 d) If we use tunnel routes, gateway could be not on-link.
491 Attempt to reconcile all of these (alas, self-contradictory) conditions
492 results in pretty ugly and hairy code with obscure logic.
494 I chose to generalized it instead, so that the size
495 of code does not increase practically, but it becomes
497 Every prefix is assigned a "scope" value: "host" is local address,
498 "link" is direct route,
499 [ ... "site" ... "interior" ... ]
500 and "universe" is true gateway route with global meaning.
502 Every prefix refers to a set of "nexthop"s (gw, oif),
503 where gw must have narrower scope. This recursion stops
504 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
505 which means that gw is forced to be on link.
507 Code is still hairy, but now it is apparently logically
508 consistent and very flexible. F.e. as by-product it allows
509 to co-exists in peace independent exterior and interior
512 Normally it looks as following.
514 {universe prefix} -> (gw, oif) [scope link]
516 |-> {link prefix} -> (gw, oif) [scope local]
518 |-> {local prefix} (terminal node)
521 static int fib_check_nh(struct fib_config
*cfg
, struct fib_info
*fi
,
527 net
= cfg
->fc_nlinfo
.nl_net
;
529 struct fib_result res
;
531 if (nh
->nh_flags
&RTNH_F_ONLINK
) {
532 struct net_device
*dev
;
534 if (cfg
->fc_scope
>= RT_SCOPE_LINK
)
536 if (inet_addr_type(net
, nh
->nh_gw
) != RTN_UNICAST
)
538 if ((dev
= __dev_get_by_index(net
, nh
->nh_oif
)) == NULL
)
540 if (!(dev
->flags
&IFF_UP
))
544 nh
->nh_scope
= RT_SCOPE_LINK
;
552 .scope
= cfg
->fc_scope
+ 1,
558 /* It is not necessary, but requires a bit of thinking */
559 if (fl
.fl4_scope
< RT_SCOPE_LINK
)
560 fl
.fl4_scope
= RT_SCOPE_LINK
;
561 if ((err
= fib_lookup(net
, &fl
, &res
)) != 0)
565 if (res
.type
!= RTN_UNICAST
&& res
.type
!= RTN_LOCAL
)
567 nh
->nh_scope
= res
.scope
;
568 nh
->nh_oif
= FIB_RES_OIF(res
);
569 if ((nh
->nh_dev
= FIB_RES_DEV(res
)) == NULL
)
571 dev_hold(nh
->nh_dev
);
573 if (!(nh
->nh_dev
->flags
& IFF_UP
))
580 struct in_device
*in_dev
;
582 if (nh
->nh_flags
&(RTNH_F_PERVASIVE
|RTNH_F_ONLINK
))
585 in_dev
= inetdev_by_index(net
, nh
->nh_oif
);
588 if (!(in_dev
->dev
->flags
&IFF_UP
)) {
592 nh
->nh_dev
= in_dev
->dev
;
593 dev_hold(nh
->nh_dev
);
594 nh
->nh_scope
= RT_SCOPE_HOST
;
600 static inline unsigned int fib_laddr_hashfn(__be32 val
)
602 unsigned int mask
= (fib_hash_size
- 1);
604 return ((__force u32
)val
^ ((__force u32
)val
>> 7) ^ ((__force u32
)val
>> 14)) & mask
;
607 static struct hlist_head
*fib_hash_alloc(int bytes
)
609 if (bytes
<= PAGE_SIZE
)
610 return kzalloc(bytes
, GFP_KERNEL
);
612 return (struct hlist_head
*)
613 __get_free_pages(GFP_KERNEL
| __GFP_ZERO
, get_order(bytes
));
616 static void fib_hash_free(struct hlist_head
*hash
, int bytes
)
621 if (bytes
<= PAGE_SIZE
)
624 free_pages((unsigned long) hash
, get_order(bytes
));
627 static void fib_hash_move(struct hlist_head
*new_info_hash
,
628 struct hlist_head
*new_laddrhash
,
629 unsigned int new_size
)
631 struct hlist_head
*old_info_hash
, *old_laddrhash
;
632 unsigned int old_size
= fib_hash_size
;
633 unsigned int i
, bytes
;
635 spin_lock_bh(&fib_info_lock
);
636 old_info_hash
= fib_info_hash
;
637 old_laddrhash
= fib_info_laddrhash
;
638 fib_hash_size
= new_size
;
640 for (i
= 0; i
< old_size
; i
++) {
641 struct hlist_head
*head
= &fib_info_hash
[i
];
642 struct hlist_node
*node
, *n
;
645 hlist_for_each_entry_safe(fi
, node
, n
, head
, fib_hash
) {
646 struct hlist_head
*dest
;
647 unsigned int new_hash
;
649 hlist_del(&fi
->fib_hash
);
651 new_hash
= fib_info_hashfn(fi
);
652 dest
= &new_info_hash
[new_hash
];
653 hlist_add_head(&fi
->fib_hash
, dest
);
656 fib_info_hash
= new_info_hash
;
658 for (i
= 0; i
< old_size
; i
++) {
659 struct hlist_head
*lhead
= &fib_info_laddrhash
[i
];
660 struct hlist_node
*node
, *n
;
663 hlist_for_each_entry_safe(fi
, node
, n
, lhead
, fib_lhash
) {
664 struct hlist_head
*ldest
;
665 unsigned int new_hash
;
667 hlist_del(&fi
->fib_lhash
);
669 new_hash
= fib_laddr_hashfn(fi
->fib_prefsrc
);
670 ldest
= &new_laddrhash
[new_hash
];
671 hlist_add_head(&fi
->fib_lhash
, ldest
);
674 fib_info_laddrhash
= new_laddrhash
;
676 spin_unlock_bh(&fib_info_lock
);
678 bytes
= old_size
* sizeof(struct hlist_head
*);
679 fib_hash_free(old_info_hash
, bytes
);
680 fib_hash_free(old_laddrhash
, bytes
);
683 struct fib_info
*fib_create_info(struct fib_config
*cfg
)
686 struct fib_info
*fi
= NULL
;
687 struct fib_info
*ofi
;
689 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
691 /* Fast check to catch the most weird cases */
692 if (fib_props
[cfg
->fc_type
].scope
> cfg
->fc_scope
)
695 #ifdef CONFIG_IP_ROUTE_MULTIPATH
697 nhs
= fib_count_nexthops(cfg
->fc_mp
, cfg
->fc_mp_len
);
704 if (fib_info_cnt
>= fib_hash_size
) {
705 unsigned int new_size
= fib_hash_size
<< 1;
706 struct hlist_head
*new_info_hash
;
707 struct hlist_head
*new_laddrhash
;
712 bytes
= new_size
* sizeof(struct hlist_head
*);
713 new_info_hash
= fib_hash_alloc(bytes
);
714 new_laddrhash
= fib_hash_alloc(bytes
);
715 if (!new_info_hash
|| !new_laddrhash
) {
716 fib_hash_free(new_info_hash
, bytes
);
717 fib_hash_free(new_laddrhash
, bytes
);
719 fib_hash_move(new_info_hash
, new_laddrhash
, new_size
);
725 fi
= kzalloc(sizeof(*fi
)+nhs
*sizeof(struct fib_nh
), GFP_KERNEL
);
730 fi
->fib_net
= hold_net(net
);
731 fi
->fib_protocol
= cfg
->fc_protocol
;
732 fi
->fib_flags
= cfg
->fc_flags
;
733 fi
->fib_priority
= cfg
->fc_priority
;
734 fi
->fib_prefsrc
= cfg
->fc_prefsrc
;
737 change_nexthops(fi
) {
738 nexthop_nh
->nh_parent
= fi
;
739 } endfor_nexthops(fi
)
745 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
746 int type
= nla_type(nla
);
751 fi
->fib_metrics
[type
- 1] = nla_get_u32(nla
);
757 #ifdef CONFIG_IP_ROUTE_MULTIPATH
758 err
= fib_get_nhs(fi
, cfg
->fc_mp
, cfg
->fc_mp_len
, cfg
);
761 if (cfg
->fc_oif
&& fi
->fib_nh
->nh_oif
!= cfg
->fc_oif
)
763 if (cfg
->fc_gw
&& fi
->fib_nh
->nh_gw
!= cfg
->fc_gw
)
765 #ifdef CONFIG_NET_CLS_ROUTE
766 if (cfg
->fc_flow
&& fi
->fib_nh
->nh_tclassid
!= cfg
->fc_flow
)
773 struct fib_nh
*nh
= fi
->fib_nh
;
775 nh
->nh_oif
= cfg
->fc_oif
;
776 nh
->nh_gw
= cfg
->fc_gw
;
777 nh
->nh_flags
= cfg
->fc_flags
;
778 #ifdef CONFIG_NET_CLS_ROUTE
779 nh
->nh_tclassid
= cfg
->fc_flow
;
781 #ifdef CONFIG_IP_ROUTE_MULTIPATH
786 if (fib_props
[cfg
->fc_type
].error
) {
787 if (cfg
->fc_gw
|| cfg
->fc_oif
|| cfg
->fc_mp
)
792 if (cfg
->fc_scope
> RT_SCOPE_HOST
)
795 if (cfg
->fc_scope
== RT_SCOPE_HOST
) {
796 struct fib_nh
*nh
= fi
->fib_nh
;
798 /* Local address is added. */
799 if (nhs
!= 1 || nh
->nh_gw
)
801 nh
->nh_scope
= RT_SCOPE_NOWHERE
;
802 nh
->nh_dev
= dev_get_by_index(net
, fi
->fib_nh
->nh_oif
);
804 if (nh
->nh_dev
== NULL
)
807 change_nexthops(fi
) {
808 if ((err
= fib_check_nh(cfg
, fi
, nexthop_nh
)) != 0)
810 } endfor_nexthops(fi
)
813 if (fi
->fib_prefsrc
) {
814 if (cfg
->fc_type
!= RTN_LOCAL
|| !cfg
->fc_dst
||
815 fi
->fib_prefsrc
!= cfg
->fc_dst
)
816 if (inet_addr_type(net
, fi
->fib_prefsrc
) != RTN_LOCAL
)
821 if ((ofi
= fib_find_info(fi
)) != NULL
) {
829 atomic_inc(&fi
->fib_clntref
);
830 spin_lock_bh(&fib_info_lock
);
831 hlist_add_head(&fi
->fib_hash
,
832 &fib_info_hash
[fib_info_hashfn(fi
)]);
833 if (fi
->fib_prefsrc
) {
834 struct hlist_head
*head
;
836 head
= &fib_info_laddrhash
[fib_laddr_hashfn(fi
->fib_prefsrc
)];
837 hlist_add_head(&fi
->fib_lhash
, head
);
839 change_nexthops(fi
) {
840 struct hlist_head
*head
;
843 if (!nexthop_nh
->nh_dev
)
845 hash
= fib_devindex_hashfn(nexthop_nh
->nh_dev
->ifindex
);
846 head
= &fib_info_devhash
[hash
];
847 hlist_add_head(&nexthop_nh
->nh_hash
, head
);
848 } endfor_nexthops(fi
)
849 spin_unlock_bh(&fib_info_lock
);
864 /* Note! fib_semantic_match intentionally uses RCU list functions. */
865 int fib_semantic_match(struct list_head
*head
, const struct flowi
*flp
,
866 struct fib_result
*res
, int prefixlen
)
868 struct fib_alias
*fa
;
871 list_for_each_entry_rcu(fa
, head
, fa_list
) {
875 fa
->fa_tos
!= flp
->fl4_tos
)
878 if (fa
->fa_scope
< flp
->fl4_scope
)
881 fa
->fa_state
|= FA_S_ACCESSED
;
883 err
= fib_props
[fa
->fa_type
].error
;
885 struct fib_info
*fi
= fa
->fa_info
;
887 if (fi
->fib_flags
& RTNH_F_DEAD
)
890 switch (fa
->fa_type
) {
897 if (nh
->nh_flags
&RTNH_F_DEAD
)
899 if (!flp
->oif
|| flp
->oif
== nh
->nh_oif
)
902 #ifdef CONFIG_IP_ROUTE_MULTIPATH
903 if (nhsel
< fi
->fib_nhs
) {
916 printk(KERN_WARNING
"fib_semantic_match bad type %#x\n",
926 res
->prefixlen
= prefixlen
;
927 res
->nh_sel
= nh_sel
;
928 res
->type
= fa
->fa_type
;
929 res
->scope
= fa
->fa_scope
;
930 res
->fi
= fa
->fa_info
;
931 atomic_inc(&res
->fi
->fib_clntref
);
935 /* Find appropriate source address to this destination */
937 __be32
__fib_res_prefsrc(struct fib_result
*res
)
939 return inet_select_addr(FIB_RES_DEV(*res
), FIB_RES_GW(*res
), res
->scope
);
942 int fib_dump_info(struct sk_buff
*skb
, u32 pid
, u32 seq
, int event
,
943 u32 tb_id
, u8 type
, u8 scope
, __be32 dst
, int dst_len
, u8 tos
,
944 struct fib_info
*fi
, unsigned int flags
)
946 struct nlmsghdr
*nlh
;
949 nlh
= nlmsg_put(skb
, pid
, seq
, event
, sizeof(*rtm
), flags
);
953 rtm
= nlmsg_data(nlh
);
954 rtm
->rtm_family
= AF_INET
;
955 rtm
->rtm_dst_len
= dst_len
;
956 rtm
->rtm_src_len
= 0;
959 rtm
->rtm_table
= tb_id
;
961 rtm
->rtm_table
= RT_TABLE_COMPAT
;
962 NLA_PUT_U32(skb
, RTA_TABLE
, tb_id
);
963 rtm
->rtm_type
= type
;
964 rtm
->rtm_flags
= fi
->fib_flags
;
965 rtm
->rtm_scope
= scope
;
966 rtm
->rtm_protocol
= fi
->fib_protocol
;
968 if (rtm
->rtm_dst_len
)
969 NLA_PUT_BE32(skb
, RTA_DST
, dst
);
971 if (fi
->fib_priority
)
972 NLA_PUT_U32(skb
, RTA_PRIORITY
, fi
->fib_priority
);
974 if (rtnetlink_put_metrics(skb
, fi
->fib_metrics
) < 0)
975 goto nla_put_failure
;
978 NLA_PUT_BE32(skb
, RTA_PREFSRC
, fi
->fib_prefsrc
);
980 if (fi
->fib_nhs
== 1) {
981 if (fi
->fib_nh
->nh_gw
)
982 NLA_PUT_BE32(skb
, RTA_GATEWAY
, fi
->fib_nh
->nh_gw
);
984 if (fi
->fib_nh
->nh_oif
)
985 NLA_PUT_U32(skb
, RTA_OIF
, fi
->fib_nh
->nh_oif
);
986 #ifdef CONFIG_NET_CLS_ROUTE
987 if (fi
->fib_nh
[0].nh_tclassid
)
988 NLA_PUT_U32(skb
, RTA_FLOW
, fi
->fib_nh
[0].nh_tclassid
);
991 #ifdef CONFIG_IP_ROUTE_MULTIPATH
992 if (fi
->fib_nhs
> 1) {
993 struct rtnexthop
*rtnh
;
996 mp
= nla_nest_start(skb
, RTA_MULTIPATH
);
998 goto nla_put_failure
;
1001 rtnh
= nla_reserve_nohdr(skb
, sizeof(*rtnh
));
1003 goto nla_put_failure
;
1005 rtnh
->rtnh_flags
= nh
->nh_flags
& 0xFF;
1006 rtnh
->rtnh_hops
= nh
->nh_weight
- 1;
1007 rtnh
->rtnh_ifindex
= nh
->nh_oif
;
1010 NLA_PUT_BE32(skb
, RTA_GATEWAY
, nh
->nh_gw
);
1011 #ifdef CONFIG_NET_CLS_ROUTE
1012 if (nh
->nh_tclassid
)
1013 NLA_PUT_U32(skb
, RTA_FLOW
, nh
->nh_tclassid
);
1015 /* length of rtnetlink header + attributes */
1016 rtnh
->rtnh_len
= nlmsg_get_pos(skb
) - (void *) rtnh
;
1017 } endfor_nexthops(fi
);
1019 nla_nest_end(skb
, mp
);
1022 return nlmsg_end(skb
, nlh
);
1025 nlmsg_cancel(skb
, nlh
);
1031 - local address disappeared -> we must delete all the entries
1033 - device went down -> we must shutdown all nexthops going via it.
1035 int fib_sync_down_addr(struct net
*net
, __be32 local
)
1038 unsigned int hash
= fib_laddr_hashfn(local
);
1039 struct hlist_head
*head
= &fib_info_laddrhash
[hash
];
1040 struct hlist_node
*node
;
1041 struct fib_info
*fi
;
1043 if (fib_info_laddrhash
== NULL
|| local
== 0)
1046 hlist_for_each_entry(fi
, node
, head
, fib_lhash
) {
1047 if (!net_eq(fi
->fib_net
, net
))
1049 if (fi
->fib_prefsrc
== local
) {
1050 fi
->fib_flags
|= RTNH_F_DEAD
;
1057 int fib_sync_down_dev(struct net_device
*dev
, int force
)
1060 int scope
= RT_SCOPE_NOWHERE
;
1061 struct fib_info
*prev_fi
= NULL
;
1062 unsigned int hash
= fib_devindex_hashfn(dev
->ifindex
);
1063 struct hlist_head
*head
= &fib_info_devhash
[hash
];
1064 struct hlist_node
*node
;
1070 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
1071 struct fib_info
*fi
= nh
->nh_parent
;
1074 BUG_ON(!fi
->fib_nhs
);
1075 if (nh
->nh_dev
!= dev
|| fi
== prev_fi
)
1079 change_nexthops(fi
) {
1080 if (nexthop_nh
->nh_flags
&RTNH_F_DEAD
)
1082 else if (nexthop_nh
->nh_dev
== dev
&&
1083 nexthop_nh
->nh_scope
!= scope
) {
1084 nexthop_nh
->nh_flags
|= RTNH_F_DEAD
;
1085 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1086 spin_lock_bh(&fib_multipath_lock
);
1087 fi
->fib_power
-= nexthop_nh
->nh_power
;
1088 nexthop_nh
->nh_power
= 0;
1089 spin_unlock_bh(&fib_multipath_lock
);
1093 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1094 if (force
> 1 && nexthop_nh
->nh_dev
== dev
) {
1099 } endfor_nexthops(fi
)
1100 if (dead
== fi
->fib_nhs
) {
1101 fi
->fib_flags
|= RTNH_F_DEAD
;
1109 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1112 Dead device goes up. We wake up dead nexthops.
1113 It takes sense only on multipath routes.
1116 int fib_sync_up(struct net_device
*dev
)
1118 struct fib_info
*prev_fi
;
1120 struct hlist_head
*head
;
1121 struct hlist_node
*node
;
1125 if (!(dev
->flags
&IFF_UP
))
1129 hash
= fib_devindex_hashfn(dev
->ifindex
);
1130 head
= &fib_info_devhash
[hash
];
1133 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
1134 struct fib_info
*fi
= nh
->nh_parent
;
1137 BUG_ON(!fi
->fib_nhs
);
1138 if (nh
->nh_dev
!= dev
|| fi
== prev_fi
)
1143 change_nexthops(fi
) {
1144 if (!(nexthop_nh
->nh_flags
&RTNH_F_DEAD
)) {
1148 if (nexthop_nh
->nh_dev
== NULL
||
1149 !(nexthop_nh
->nh_dev
->flags
&IFF_UP
))
1151 if (nexthop_nh
->nh_dev
!= dev
||
1152 !__in_dev_get_rtnl(dev
))
1155 spin_lock_bh(&fib_multipath_lock
);
1156 nexthop_nh
->nh_power
= 0;
1157 nexthop_nh
->nh_flags
&= ~RTNH_F_DEAD
;
1158 spin_unlock_bh(&fib_multipath_lock
);
1159 } endfor_nexthops(fi
)
1162 fi
->fib_flags
&= ~RTNH_F_DEAD
;
1171 The algorithm is suboptimal, but it provides really
1172 fair weighted route distribution.
1175 void fib_select_multipath(const struct flowi
*flp
, struct fib_result
*res
)
1177 struct fib_info
*fi
= res
->fi
;
1180 spin_lock_bh(&fib_multipath_lock
);
1181 if (fi
->fib_power
<= 0) {
1183 change_nexthops(fi
) {
1184 if (!(nexthop_nh
->nh_flags
&RTNH_F_DEAD
)) {
1185 power
+= nexthop_nh
->nh_weight
;
1186 nexthop_nh
->nh_power
= nexthop_nh
->nh_weight
;
1188 } endfor_nexthops(fi
);
1189 fi
->fib_power
= power
;
1191 spin_unlock_bh(&fib_multipath_lock
);
1192 /* Race condition: route has just become dead. */
1199 /* w should be random number [0..fi->fib_power-1],
1200 it is pretty bad approximation.
1203 w
= jiffies
% fi
->fib_power
;
1205 change_nexthops(fi
) {
1206 if (!(nexthop_nh
->nh_flags
&RTNH_F_DEAD
) &&
1207 nexthop_nh
->nh_power
) {
1208 if ((w
-= nexthop_nh
->nh_power
) <= 0) {
1209 nexthop_nh
->nh_power
--;
1211 res
->nh_sel
= nhsel
;
1212 spin_unlock_bh(&fib_multipath_lock
);
1216 } endfor_nexthops(fi
);
1218 /* Race condition: route has just become dead. */
1220 spin_unlock_bh(&fib_multipath_lock
);