2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
45 #include <net/netlink.h>
46 #include <net/nexthop.h>
48 #include "fib_lookup.h"
50 static DEFINE_SPINLOCK(fib_info_lock
);
51 static struct hlist_head
*fib_info_hash
;
52 static struct hlist_head
*fib_info_laddrhash
;
53 static unsigned int fib_hash_size
;
54 static unsigned int fib_info_cnt
;
56 #define DEVINDEX_HASHBITS 8
57 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58 static struct hlist_head fib_info_devhash
[DEVINDEX_HASHSIZE
];
60 #ifdef CONFIG_IP_ROUTE_MULTIPATH
62 static DEFINE_SPINLOCK(fib_multipath_lock
);
64 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
67 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
70 #else /* CONFIG_IP_ROUTE_MULTIPATH */
72 /* Hope, that gcc will optimize it to get rid of dummy loop */
74 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75 for (nhsel=0; nhsel < 1; nhsel++)
77 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78 for (nhsel=0; nhsel < 1; nhsel++)
80 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
82 #define endfor_nexthops(fi) }
89 } fib_props
[RTN_MAX
+ 1] = {
92 .scope
= RT_SCOPE_NOWHERE
,
96 .scope
= RT_SCOPE_UNIVERSE
,
100 .scope
= RT_SCOPE_HOST
,
104 .scope
= RT_SCOPE_LINK
,
105 }, /* RTN_BROADCAST */
108 .scope
= RT_SCOPE_LINK
,
112 .scope
= RT_SCOPE_UNIVERSE
,
113 }, /* RTN_MULTICAST */
116 .scope
= RT_SCOPE_UNIVERSE
,
117 }, /* RTN_BLACKHOLE */
119 .error
= -EHOSTUNREACH
,
120 .scope
= RT_SCOPE_UNIVERSE
,
121 }, /* RTN_UNREACHABLE */
124 .scope
= RT_SCOPE_UNIVERSE
,
125 }, /* RTN_PROHIBIT */
128 .scope
= RT_SCOPE_UNIVERSE
,
132 .scope
= RT_SCOPE_NOWHERE
,
136 .scope
= RT_SCOPE_NOWHERE
,
137 }, /* RTN_XRESOLVE */
141 /* Release a nexthop info record */
143 void free_fib_info(struct fib_info
*fi
)
145 if (fi
->fib_dead
== 0) {
146 printk(KERN_WARNING
"Freeing alive fib_info %p\n", fi
);
149 change_nexthops(fi
) {
153 } endfor_nexthops(fi
);
158 void fib_release_info(struct fib_info
*fi
)
160 spin_lock_bh(&fib_info_lock
);
161 if (fi
&& --fi
->fib_treeref
== 0) {
162 hlist_del(&fi
->fib_hash
);
164 hlist_del(&fi
->fib_lhash
);
165 change_nexthops(fi
) {
168 hlist_del(&nh
->nh_hash
);
169 } endfor_nexthops(fi
)
173 spin_unlock_bh(&fib_info_lock
);
176 static __inline__
int nh_comp(const struct fib_info
*fi
, const struct fib_info
*ofi
)
178 const struct fib_nh
*onh
= ofi
->fib_nh
;
181 if (nh
->nh_oif
!= onh
->nh_oif
||
182 nh
->nh_gw
!= onh
->nh_gw
||
183 nh
->nh_scope
!= onh
->nh_scope
||
184 #ifdef CONFIG_IP_ROUTE_MULTIPATH
185 nh
->nh_weight
!= onh
->nh_weight
||
187 #ifdef CONFIG_NET_CLS_ROUTE
188 nh
->nh_tclassid
!= onh
->nh_tclassid
||
190 ((nh
->nh_flags
^onh
->nh_flags
)&~RTNH_F_DEAD
))
193 } endfor_nexthops(fi
);
197 static inline unsigned int fib_devindex_hashfn(unsigned int val
)
199 unsigned int mask
= DEVINDEX_HASHSIZE
- 1;
202 (val
>> DEVINDEX_HASHBITS
) ^
203 (val
>> (DEVINDEX_HASHBITS
* 2))) & mask
;
206 static inline unsigned int fib_info_hashfn(const struct fib_info
*fi
)
208 unsigned int mask
= (fib_hash_size
- 1);
209 unsigned int val
= fi
->fib_nhs
;
211 val
^= fi
->fib_protocol
;
212 val
^= (__force u32
)fi
->fib_prefsrc
;
213 val
^= fi
->fib_priority
;
215 val
^= fib_devindex_hashfn(nh
->nh_oif
);
216 } endfor_nexthops(fi
)
218 return (val
^ (val
>> 7) ^ (val
>> 12)) & mask
;
221 static struct fib_info
*fib_find_info(const struct fib_info
*nfi
)
223 struct hlist_head
*head
;
224 struct hlist_node
*node
;
228 hash
= fib_info_hashfn(nfi
);
229 head
= &fib_info_hash
[hash
];
231 hlist_for_each_entry(fi
, node
, head
, fib_hash
) {
232 if (fi
->fib_nhs
!= nfi
->fib_nhs
)
234 if (nfi
->fib_protocol
== fi
->fib_protocol
&&
235 nfi
->fib_prefsrc
== fi
->fib_prefsrc
&&
236 nfi
->fib_priority
== fi
->fib_priority
&&
237 memcmp(nfi
->fib_metrics
, fi
->fib_metrics
,
238 sizeof(fi
->fib_metrics
)) == 0 &&
239 ((nfi
->fib_flags
^fi
->fib_flags
)&~RTNH_F_DEAD
) == 0 &&
240 (nfi
->fib_nhs
== 0 || nh_comp(fi
, nfi
) == 0))
247 /* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
251 int ip_fib_check_default(__be32 gw
, struct net_device
*dev
)
253 struct hlist_head
*head
;
254 struct hlist_node
*node
;
258 spin_lock(&fib_info_lock
);
260 hash
= fib_devindex_hashfn(dev
->ifindex
);
261 head
= &fib_info_devhash
[hash
];
262 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
263 if (nh
->nh_dev
== dev
&&
265 !(nh
->nh_flags
&RTNH_F_DEAD
)) {
266 spin_unlock(&fib_info_lock
);
271 spin_unlock(&fib_info_lock
);
276 static inline size_t fib_nlmsg_size(struct fib_info
*fi
)
278 size_t payload
= NLMSG_ALIGN(sizeof(struct rtmsg
))
279 + nla_total_size(4) /* RTA_TABLE */
280 + nla_total_size(4) /* RTA_DST */
281 + nla_total_size(4) /* RTA_PRIORITY */
282 + nla_total_size(4); /* RTA_PREFSRC */
284 /* space for nested metrics */
285 payload
+= nla_total_size((RTAX_MAX
* nla_total_size(4)));
288 /* Also handles the special case fib_nhs == 1 */
290 /* each nexthop is packed in an attribute */
291 size_t nhsize
= nla_total_size(sizeof(struct rtnexthop
));
293 /* may contain flow and gateway attribute */
294 nhsize
+= 2 * nla_total_size(4);
296 /* all nexthops are packed in a nested attribute */
297 payload
+= nla_total_size(fi
->fib_nhs
* nhsize
);
303 void rtmsg_fib(int event
, __be32 key
, struct fib_alias
*fa
,
304 int dst_len
, u32 tb_id
, struct nl_info
*info
,
305 unsigned int nlm_flags
)
308 u32 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
311 skb
= nlmsg_new(fib_nlmsg_size(fa
->fa_info
), GFP_KERNEL
);
315 err
= fib_dump_info(skb
, info
->pid
, seq
, event
, tb_id
,
316 fa
->fa_type
, fa
->fa_scope
, key
, dst_len
,
317 fa
->fa_tos
, fa
->fa_info
, nlm_flags
);
319 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
320 WARN_ON(err
== -EMSGSIZE
);
324 err
= rtnl_notify(skb
, info
->nl_net
, info
->pid
, RTNLGRP_IPV4_ROUTE
,
325 info
->nlh
, GFP_KERNEL
);
328 rtnl_set_sk_err(info
->nl_net
, RTNLGRP_IPV4_ROUTE
, err
);
331 /* Return the first fib alias matching TOS with
332 * priority less than or equal to PRIO.
334 struct fib_alias
*fib_find_alias(struct list_head
*fah
, u8 tos
, u32 prio
)
337 struct fib_alias
*fa
;
338 list_for_each_entry(fa
, fah
, fa_list
) {
339 if (fa
->fa_tos
> tos
)
341 if (fa
->fa_info
->fib_priority
>= prio
||
349 int fib_detect_death(struct fib_info
*fi
, int order
,
350 struct fib_info
**last_resort
, int *last_idx
, int dflt
)
353 int state
= NUD_NONE
;
355 n
= neigh_lookup(&arp_tbl
, &fi
->fib_nh
[0].nh_gw
, fi
->fib_dev
);
357 state
= n
->nud_state
;
360 if (state
==NUD_REACHABLE
)
362 if ((state
&NUD_VALID
) && order
!= dflt
)
364 if ((state
&NUD_VALID
) ||
365 (*last_idx
<0 && order
> dflt
)) {
372 #ifdef CONFIG_IP_ROUTE_MULTIPATH
374 static int fib_count_nexthops(struct rtnexthop
*rtnh
, int remaining
)
378 while (rtnh_ok(rtnh
, remaining
)) {
380 rtnh
= rtnh_next(rtnh
, &remaining
);
383 /* leftover implies invalid nexthop configuration, discard it */
384 return remaining
> 0 ? 0 : nhs
;
387 static int fib_get_nhs(struct fib_info
*fi
, struct rtnexthop
*rtnh
,
388 int remaining
, struct fib_config
*cfg
)
390 change_nexthops(fi
) {
393 if (!rtnh_ok(rtnh
, remaining
))
396 nh
->nh_flags
= (cfg
->fc_flags
& ~0xFF) | rtnh
->rtnh_flags
;
397 nh
->nh_oif
= rtnh
->rtnh_ifindex
;
398 nh
->nh_weight
= rtnh
->rtnh_hops
+ 1;
400 attrlen
= rtnh_attrlen(rtnh
);
402 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
404 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
405 nh
->nh_gw
= nla
? nla_get_be32(nla
) : 0;
406 #ifdef CONFIG_NET_CLS_ROUTE
407 nla
= nla_find(attrs
, attrlen
, RTA_FLOW
);
408 nh
->nh_tclassid
= nla
? nla_get_u32(nla
) : 0;
412 rtnh
= rtnh_next(rtnh
, &remaining
);
413 } endfor_nexthops(fi
);
420 int fib_nh_match(struct fib_config
*cfg
, struct fib_info
*fi
)
422 #ifdef CONFIG_IP_ROUTE_MULTIPATH
423 struct rtnexthop
*rtnh
;
427 if (cfg
->fc_priority
&& cfg
->fc_priority
!= fi
->fib_priority
)
430 if (cfg
->fc_oif
|| cfg
->fc_gw
) {
431 if ((!cfg
->fc_oif
|| cfg
->fc_oif
== fi
->fib_nh
->nh_oif
) &&
432 (!cfg
->fc_gw
|| cfg
->fc_gw
== fi
->fib_nh
->nh_gw
))
437 #ifdef CONFIG_IP_ROUTE_MULTIPATH
438 if (cfg
->fc_mp
== NULL
)
442 remaining
= cfg
->fc_mp_len
;
447 if (!rtnh_ok(rtnh
, remaining
))
450 if (rtnh
->rtnh_ifindex
&& rtnh
->rtnh_ifindex
!= nh
->nh_oif
)
453 attrlen
= rtnh_attrlen(rtnh
);
455 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
457 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
458 if (nla
&& nla_get_be32(nla
) != nh
->nh_gw
)
460 #ifdef CONFIG_NET_CLS_ROUTE
461 nla
= nla_find(attrs
, attrlen
, RTA_FLOW
);
462 if (nla
&& nla_get_u32(nla
) != nh
->nh_tclassid
)
467 rtnh
= rtnh_next(rtnh
, &remaining
);
468 } endfor_nexthops(fi
);
478 Semantics of nexthop is very messy by historical reasons.
479 We have to take into account, that:
480 a) gateway can be actually local interface address,
481 so that gatewayed route is direct.
482 b) gateway must be on-link address, possibly
483 described not by an ifaddr, but also by a direct route.
484 c) If both gateway and interface are specified, they should not
486 d) If we use tunnel routes, gateway could be not on-link.
488 Attempt to reconcile all of these (alas, self-contradictory) conditions
489 results in pretty ugly and hairy code with obscure logic.
491 I chose to generalized it instead, so that the size
492 of code does not increase practically, but it becomes
494 Every prefix is assigned a "scope" value: "host" is local address,
495 "link" is direct route,
496 [ ... "site" ... "interior" ... ]
497 and "universe" is true gateway route with global meaning.
499 Every prefix refers to a set of "nexthop"s (gw, oif),
500 where gw must have narrower scope. This recursion stops
501 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
502 which means that gw is forced to be on link.
504 Code is still hairy, but now it is apparently logically
505 consistent and very flexible. F.e. as by-product it allows
506 to co-exists in peace independent exterior and interior
509 Normally it looks as following.
511 {universe prefix} -> (gw, oif) [scope link]
513 |-> {link prefix} -> (gw, oif) [scope local]
515 |-> {local prefix} (terminal node)
518 static int fib_check_nh(struct fib_config
*cfg
, struct fib_info
*fi
,
524 net
= cfg
->fc_nlinfo
.nl_net
;
526 struct fib_result res
;
528 #ifdef CONFIG_IP_ROUTE_PERVASIVE
529 if (nh
->nh_flags
&RTNH_F_PERVASIVE
)
532 if (nh
->nh_flags
&RTNH_F_ONLINK
) {
533 struct net_device
*dev
;
535 if (cfg
->fc_scope
>= RT_SCOPE_LINK
)
537 if (inet_addr_type(net
, nh
->nh_gw
) != RTN_UNICAST
)
539 if ((dev
= __dev_get_by_index(net
, nh
->nh_oif
)) == NULL
)
541 if (!(dev
->flags
&IFF_UP
))
545 nh
->nh_scope
= RT_SCOPE_LINK
;
553 .scope
= cfg
->fc_scope
+ 1,
559 /* It is not necessary, but requires a bit of thinking */
560 if (fl
.fl4_scope
< RT_SCOPE_LINK
)
561 fl
.fl4_scope
= RT_SCOPE_LINK
;
562 if ((err
= fib_lookup(net
, &fl
, &res
)) != 0)
566 if (res
.type
!= RTN_UNICAST
&& res
.type
!= RTN_LOCAL
)
568 nh
->nh_scope
= res
.scope
;
569 nh
->nh_oif
= FIB_RES_OIF(res
);
570 if ((nh
->nh_dev
= FIB_RES_DEV(res
)) == NULL
)
572 dev_hold(nh
->nh_dev
);
574 if (!(nh
->nh_dev
->flags
& IFF_UP
))
581 struct in_device
*in_dev
;
583 if (nh
->nh_flags
&(RTNH_F_PERVASIVE
|RTNH_F_ONLINK
))
586 in_dev
= inetdev_by_index(net
, nh
->nh_oif
);
589 if (!(in_dev
->dev
->flags
&IFF_UP
)) {
593 nh
->nh_dev
= in_dev
->dev
;
594 dev_hold(nh
->nh_dev
);
595 nh
->nh_scope
= RT_SCOPE_HOST
;
601 static inline unsigned int fib_laddr_hashfn(__be32 val
)
603 unsigned int mask
= (fib_hash_size
- 1);
605 return ((__force u32
)val
^ ((__force u32
)val
>> 7) ^ ((__force u32
)val
>> 14)) & mask
;
608 static struct hlist_head
*fib_hash_alloc(int bytes
)
610 if (bytes
<= PAGE_SIZE
)
611 return kzalloc(bytes
, GFP_KERNEL
);
613 return (struct hlist_head
*)
614 __get_free_pages(GFP_KERNEL
| __GFP_ZERO
, get_order(bytes
));
617 static void fib_hash_free(struct hlist_head
*hash
, int bytes
)
622 if (bytes
<= PAGE_SIZE
)
625 free_pages((unsigned long) hash
, get_order(bytes
));
628 static void fib_hash_move(struct hlist_head
*new_info_hash
,
629 struct hlist_head
*new_laddrhash
,
630 unsigned int new_size
)
632 struct hlist_head
*old_info_hash
, *old_laddrhash
;
633 unsigned int old_size
= fib_hash_size
;
634 unsigned int i
, bytes
;
636 spin_lock_bh(&fib_info_lock
);
637 old_info_hash
= fib_info_hash
;
638 old_laddrhash
= fib_info_laddrhash
;
639 fib_hash_size
= new_size
;
641 for (i
= 0; i
< old_size
; i
++) {
642 struct hlist_head
*head
= &fib_info_hash
[i
];
643 struct hlist_node
*node
, *n
;
646 hlist_for_each_entry_safe(fi
, node
, n
, head
, fib_hash
) {
647 struct hlist_head
*dest
;
648 unsigned int new_hash
;
650 hlist_del(&fi
->fib_hash
);
652 new_hash
= fib_info_hashfn(fi
);
653 dest
= &new_info_hash
[new_hash
];
654 hlist_add_head(&fi
->fib_hash
, dest
);
657 fib_info_hash
= new_info_hash
;
659 for (i
= 0; i
< old_size
; i
++) {
660 struct hlist_head
*lhead
= &fib_info_laddrhash
[i
];
661 struct hlist_node
*node
, *n
;
664 hlist_for_each_entry_safe(fi
, node
, n
, lhead
, fib_lhash
) {
665 struct hlist_head
*ldest
;
666 unsigned int new_hash
;
668 hlist_del(&fi
->fib_lhash
);
670 new_hash
= fib_laddr_hashfn(fi
->fib_prefsrc
);
671 ldest
= &new_laddrhash
[new_hash
];
672 hlist_add_head(&fi
->fib_lhash
, ldest
);
675 fib_info_laddrhash
= new_laddrhash
;
677 spin_unlock_bh(&fib_info_lock
);
679 bytes
= old_size
* sizeof(struct hlist_head
*);
680 fib_hash_free(old_info_hash
, bytes
);
681 fib_hash_free(old_laddrhash
, bytes
);
684 struct fib_info
*fib_create_info(struct fib_config
*cfg
)
687 struct fib_info
*fi
= NULL
;
688 struct fib_info
*ofi
;
691 /* Fast check to catch the most weird cases */
692 if (fib_props
[cfg
->fc_type
].scope
> cfg
->fc_scope
)
695 #ifdef CONFIG_IP_ROUTE_MULTIPATH
697 nhs
= fib_count_nexthops(cfg
->fc_mp
, cfg
->fc_mp_len
);
704 if (fib_info_cnt
>= fib_hash_size
) {
705 unsigned int new_size
= fib_hash_size
<< 1;
706 struct hlist_head
*new_info_hash
;
707 struct hlist_head
*new_laddrhash
;
712 bytes
= new_size
* sizeof(struct hlist_head
*);
713 new_info_hash
= fib_hash_alloc(bytes
);
714 new_laddrhash
= fib_hash_alloc(bytes
);
715 if (!new_info_hash
|| !new_laddrhash
) {
716 fib_hash_free(new_info_hash
, bytes
);
717 fib_hash_free(new_laddrhash
, bytes
);
719 fib_hash_move(new_info_hash
, new_laddrhash
, new_size
);
725 fi
= kzalloc(sizeof(*fi
)+nhs
*sizeof(struct fib_nh
), GFP_KERNEL
);
730 fi
->fib_protocol
= cfg
->fc_protocol
;
731 fi
->fib_flags
= cfg
->fc_flags
;
732 fi
->fib_priority
= cfg
->fc_priority
;
733 fi
->fib_prefsrc
= cfg
->fc_prefsrc
;
736 change_nexthops(fi
) {
738 } endfor_nexthops(fi
)
744 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
745 int type
= nla_type(nla
);
750 fi
->fib_metrics
[type
- 1] = nla_get_u32(nla
);
756 #ifdef CONFIG_IP_ROUTE_MULTIPATH
757 err
= fib_get_nhs(fi
, cfg
->fc_mp
, cfg
->fc_mp_len
, cfg
);
760 if (cfg
->fc_oif
&& fi
->fib_nh
->nh_oif
!= cfg
->fc_oif
)
762 if (cfg
->fc_gw
&& fi
->fib_nh
->nh_gw
!= cfg
->fc_gw
)
764 #ifdef CONFIG_NET_CLS_ROUTE
765 if (cfg
->fc_flow
&& fi
->fib_nh
->nh_tclassid
!= cfg
->fc_flow
)
772 struct fib_nh
*nh
= fi
->fib_nh
;
774 nh
->nh_oif
= cfg
->fc_oif
;
775 nh
->nh_gw
= cfg
->fc_gw
;
776 nh
->nh_flags
= cfg
->fc_flags
;
777 #ifdef CONFIG_NET_CLS_ROUTE
778 nh
->nh_tclassid
= cfg
->fc_flow
;
780 #ifdef CONFIG_IP_ROUTE_MULTIPATH
785 if (fib_props
[cfg
->fc_type
].error
) {
786 if (cfg
->fc_gw
|| cfg
->fc_oif
|| cfg
->fc_mp
)
791 if (cfg
->fc_scope
> RT_SCOPE_HOST
)
794 if (cfg
->fc_scope
== RT_SCOPE_HOST
) {
795 struct fib_nh
*nh
= fi
->fib_nh
;
797 /* Local address is added. */
798 if (nhs
!= 1 || nh
->nh_gw
)
800 nh
->nh_scope
= RT_SCOPE_NOWHERE
;
801 nh
->nh_dev
= dev_get_by_index(cfg
->fc_nlinfo
.nl_net
,
804 if (nh
->nh_dev
== NULL
)
807 change_nexthops(fi
) {
808 if ((err
= fib_check_nh(cfg
, fi
, nh
)) != 0)
810 } endfor_nexthops(fi
)
813 if (fi
->fib_prefsrc
) {
814 if (cfg
->fc_type
!= RTN_LOCAL
|| !cfg
->fc_dst
||
815 fi
->fib_prefsrc
!= cfg
->fc_dst
)
816 if (inet_addr_type(cfg
->fc_nlinfo
.nl_net
,
817 fi
->fib_prefsrc
) != RTN_LOCAL
)
822 if ((ofi
= fib_find_info(fi
)) != NULL
) {
830 atomic_inc(&fi
->fib_clntref
);
831 spin_lock_bh(&fib_info_lock
);
832 hlist_add_head(&fi
->fib_hash
,
833 &fib_info_hash
[fib_info_hashfn(fi
)]);
834 if (fi
->fib_prefsrc
) {
835 struct hlist_head
*head
;
837 head
= &fib_info_laddrhash
[fib_laddr_hashfn(fi
->fib_prefsrc
)];
838 hlist_add_head(&fi
->fib_lhash
, head
);
840 change_nexthops(fi
) {
841 struct hlist_head
*head
;
846 hash
= fib_devindex_hashfn(nh
->nh_dev
->ifindex
);
847 head
= &fib_info_devhash
[hash
];
848 hlist_add_head(&nh
->nh_hash
, head
);
849 } endfor_nexthops(fi
)
850 spin_unlock_bh(&fib_info_lock
);
865 /* Note! fib_semantic_match intentionally uses RCU list functions. */
866 int fib_semantic_match(struct list_head
*head
, const struct flowi
*flp
,
867 struct fib_result
*res
, __be32 zone
, __be32 mask
,
870 struct fib_alias
*fa
;
873 list_for_each_entry_rcu(fa
, head
, fa_list
) {
877 fa
->fa_tos
!= flp
->fl4_tos
)
880 if (fa
->fa_scope
< flp
->fl4_scope
)
883 fa
->fa_state
|= FA_S_ACCESSED
;
885 err
= fib_props
[fa
->fa_type
].error
;
887 struct fib_info
*fi
= fa
->fa_info
;
889 if (fi
->fib_flags
& RTNH_F_DEAD
)
892 switch (fa
->fa_type
) {
899 if (nh
->nh_flags
&RTNH_F_DEAD
)
901 if (!flp
->oif
|| flp
->oif
== nh
->nh_oif
)
904 #ifdef CONFIG_IP_ROUTE_MULTIPATH
905 if (nhsel
< fi
->fib_nhs
) {
918 printk(KERN_WARNING
"fib_semantic_match bad type %#x\n",
928 res
->prefixlen
= prefixlen
;
929 res
->nh_sel
= nh_sel
;
930 res
->type
= fa
->fa_type
;
931 res
->scope
= fa
->fa_scope
;
932 res
->fi
= fa
->fa_info
;
933 atomic_inc(&res
->fi
->fib_clntref
);
937 /* Find appropriate source address to this destination */
939 __be32
__fib_res_prefsrc(struct fib_result
*res
)
941 return inet_select_addr(FIB_RES_DEV(*res
), FIB_RES_GW(*res
), res
->scope
);
944 int fib_dump_info(struct sk_buff
*skb
, u32 pid
, u32 seq
, int event
,
945 u32 tb_id
, u8 type
, u8 scope
, __be32 dst
, int dst_len
, u8 tos
,
946 struct fib_info
*fi
, unsigned int flags
)
948 struct nlmsghdr
*nlh
;
951 nlh
= nlmsg_put(skb
, pid
, seq
, event
, sizeof(*rtm
), flags
);
955 rtm
= nlmsg_data(nlh
);
956 rtm
->rtm_family
= AF_INET
;
957 rtm
->rtm_dst_len
= dst_len
;
958 rtm
->rtm_src_len
= 0;
960 rtm
->rtm_table
= tb_id
;
961 NLA_PUT_U32(skb
, RTA_TABLE
, tb_id
);
962 rtm
->rtm_type
= type
;
963 rtm
->rtm_flags
= fi
->fib_flags
;
964 rtm
->rtm_scope
= scope
;
965 rtm
->rtm_protocol
= fi
->fib_protocol
;
967 if (rtm
->rtm_dst_len
)
968 NLA_PUT_BE32(skb
, RTA_DST
, dst
);
970 if (fi
->fib_priority
)
971 NLA_PUT_U32(skb
, RTA_PRIORITY
, fi
->fib_priority
);
973 if (rtnetlink_put_metrics(skb
, fi
->fib_metrics
) < 0)
974 goto nla_put_failure
;
977 NLA_PUT_BE32(skb
, RTA_PREFSRC
, fi
->fib_prefsrc
);
979 if (fi
->fib_nhs
== 1) {
980 if (fi
->fib_nh
->nh_gw
)
981 NLA_PUT_BE32(skb
, RTA_GATEWAY
, fi
->fib_nh
->nh_gw
);
983 if (fi
->fib_nh
->nh_oif
)
984 NLA_PUT_U32(skb
, RTA_OIF
, fi
->fib_nh
->nh_oif
);
985 #ifdef CONFIG_NET_CLS_ROUTE
986 if (fi
->fib_nh
[0].nh_tclassid
)
987 NLA_PUT_U32(skb
, RTA_FLOW
, fi
->fib_nh
[0].nh_tclassid
);
990 #ifdef CONFIG_IP_ROUTE_MULTIPATH
991 if (fi
->fib_nhs
> 1) {
992 struct rtnexthop
*rtnh
;
995 mp
= nla_nest_start(skb
, RTA_MULTIPATH
);
997 goto nla_put_failure
;
1000 rtnh
= nla_reserve_nohdr(skb
, sizeof(*rtnh
));
1002 goto nla_put_failure
;
1004 rtnh
->rtnh_flags
= nh
->nh_flags
& 0xFF;
1005 rtnh
->rtnh_hops
= nh
->nh_weight
- 1;
1006 rtnh
->rtnh_ifindex
= nh
->nh_oif
;
1009 NLA_PUT_BE32(skb
, RTA_GATEWAY
, nh
->nh_gw
);
1010 #ifdef CONFIG_NET_CLS_ROUTE
1011 if (nh
->nh_tclassid
)
1012 NLA_PUT_U32(skb
, RTA_FLOW
, nh
->nh_tclassid
);
1014 /* length of rtnetlink header + attributes */
1015 rtnh
->rtnh_len
= nlmsg_get_pos(skb
) - (void *) rtnh
;
1016 } endfor_nexthops(fi
);
1018 nla_nest_end(skb
, mp
);
1021 return nlmsg_end(skb
, nlh
);
1024 nlmsg_cancel(skb
, nlh
);
1030 - local address disappeared -> we must delete all the entries
1032 - device went down -> we must shutdown all nexthops going via it.
1035 int fib_sync_down(__be32 local
, struct net_device
*dev
, int force
)
1038 int scope
= RT_SCOPE_NOWHERE
;
1043 if (local
&& fib_info_laddrhash
) {
1044 unsigned int hash
= fib_laddr_hashfn(local
);
1045 struct hlist_head
*head
= &fib_info_laddrhash
[hash
];
1046 struct hlist_node
*node
;
1047 struct fib_info
*fi
;
1049 hlist_for_each_entry(fi
, node
, head
, fib_lhash
) {
1050 if (fi
->fib_prefsrc
== local
) {
1051 fi
->fib_flags
|= RTNH_F_DEAD
;
1058 struct fib_info
*prev_fi
= NULL
;
1059 unsigned int hash
= fib_devindex_hashfn(dev
->ifindex
);
1060 struct hlist_head
*head
= &fib_info_devhash
[hash
];
1061 struct hlist_node
*node
;
1064 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
1065 struct fib_info
*fi
= nh
->nh_parent
;
1068 BUG_ON(!fi
->fib_nhs
);
1069 if (nh
->nh_dev
!= dev
|| fi
== prev_fi
)
1073 change_nexthops(fi
) {
1074 if (nh
->nh_flags
&RTNH_F_DEAD
)
1076 else if (nh
->nh_dev
== dev
&&
1077 nh
->nh_scope
!= scope
) {
1078 nh
->nh_flags
|= RTNH_F_DEAD
;
1079 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1080 spin_lock_bh(&fib_multipath_lock
);
1081 fi
->fib_power
-= nh
->nh_power
;
1083 spin_unlock_bh(&fib_multipath_lock
);
1087 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1088 if (force
> 1 && nh
->nh_dev
== dev
) {
1093 } endfor_nexthops(fi
)
1094 if (dead
== fi
->fib_nhs
) {
1095 fi
->fib_flags
|= RTNH_F_DEAD
;
1104 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1107 Dead device goes up. We wake up dead nexthops.
1108 It takes sense only on multipath routes.
1111 int fib_sync_up(struct net_device
*dev
)
1113 struct fib_info
*prev_fi
;
1115 struct hlist_head
*head
;
1116 struct hlist_node
*node
;
1120 if (!(dev
->flags
&IFF_UP
))
1124 hash
= fib_devindex_hashfn(dev
->ifindex
);
1125 head
= &fib_info_devhash
[hash
];
1128 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
1129 struct fib_info
*fi
= nh
->nh_parent
;
1132 BUG_ON(!fi
->fib_nhs
);
1133 if (nh
->nh_dev
!= dev
|| fi
== prev_fi
)
1138 change_nexthops(fi
) {
1139 if (!(nh
->nh_flags
&RTNH_F_DEAD
)) {
1143 if (nh
->nh_dev
== NULL
|| !(nh
->nh_dev
->flags
&IFF_UP
))
1145 if (nh
->nh_dev
!= dev
|| !__in_dev_get_rtnl(dev
))
1148 spin_lock_bh(&fib_multipath_lock
);
1150 nh
->nh_flags
&= ~RTNH_F_DEAD
;
1151 spin_unlock_bh(&fib_multipath_lock
);
1152 } endfor_nexthops(fi
)
1155 fi
->fib_flags
&= ~RTNH_F_DEAD
;
1164 The algorithm is suboptimal, but it provides really
1165 fair weighted route distribution.
1168 void fib_select_multipath(const struct flowi
*flp
, struct fib_result
*res
)
1170 struct fib_info
*fi
= res
->fi
;
1173 spin_lock_bh(&fib_multipath_lock
);
1174 if (fi
->fib_power
<= 0) {
1176 change_nexthops(fi
) {
1177 if (!(nh
->nh_flags
&RTNH_F_DEAD
)) {
1178 power
+= nh
->nh_weight
;
1179 nh
->nh_power
= nh
->nh_weight
;
1181 } endfor_nexthops(fi
);
1182 fi
->fib_power
= power
;
1184 spin_unlock_bh(&fib_multipath_lock
);
1185 /* Race condition: route has just become dead. */
1192 /* w should be random number [0..fi->fib_power-1],
1193 it is pretty bad approximation.
1196 w
= jiffies
% fi
->fib_power
;
1198 change_nexthops(fi
) {
1199 if (!(nh
->nh_flags
&RTNH_F_DEAD
) && nh
->nh_power
) {
1200 if ((w
-= nh
->nh_power
) <= 0) {
1203 res
->nh_sel
= nhsel
;
1204 spin_unlock_bh(&fib_multipath_lock
);
1208 } endfor_nexthops(fi
);
1210 /* Race condition: route has just become dead. */
1212 spin_unlock_bh(&fib_multipath_lock
);