2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
45 #include <net/netlink.h>
46 #include <net/nexthop.h>
48 #include "fib_lookup.h"
50 #define FSprintk(a...)
52 static DEFINE_SPINLOCK(fib_info_lock
);
53 static struct hlist_head
*fib_info_hash
;
54 static struct hlist_head
*fib_info_laddrhash
;
55 static unsigned int fib_hash_size
;
56 static unsigned int fib_info_cnt
;
58 #define DEVINDEX_HASHBITS 8
59 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
60 static struct hlist_head fib_info_devhash
[DEVINDEX_HASHSIZE
];
62 #ifdef CONFIG_IP_ROUTE_MULTIPATH
64 static DEFINE_SPINLOCK(fib_multipath_lock
);
66 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
67 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
69 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
70 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
72 #else /* CONFIG_IP_ROUTE_MULTIPATH */
74 /* Hope, that gcc will optimize it to get rid of dummy loop */
76 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
77 for (nhsel=0; nhsel < 1; nhsel++)
79 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
80 for (nhsel=0; nhsel < 1; nhsel++)
82 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
84 #define endfor_nexthops(fi) }
91 } fib_props
[RTN_MAX
+ 1] = {
94 .scope
= RT_SCOPE_NOWHERE
,
98 .scope
= RT_SCOPE_UNIVERSE
,
102 .scope
= RT_SCOPE_HOST
,
106 .scope
= RT_SCOPE_LINK
,
107 }, /* RTN_BROADCAST */
110 .scope
= RT_SCOPE_LINK
,
114 .scope
= RT_SCOPE_UNIVERSE
,
115 }, /* RTN_MULTICAST */
118 .scope
= RT_SCOPE_UNIVERSE
,
119 }, /* RTN_BLACKHOLE */
121 .error
= -EHOSTUNREACH
,
122 .scope
= RT_SCOPE_UNIVERSE
,
123 }, /* RTN_UNREACHABLE */
126 .scope
= RT_SCOPE_UNIVERSE
,
127 }, /* RTN_PROHIBIT */
130 .scope
= RT_SCOPE_UNIVERSE
,
134 .scope
= RT_SCOPE_NOWHERE
,
138 .scope
= RT_SCOPE_NOWHERE
,
139 }, /* RTN_XRESOLVE */
143 /* Release a nexthop info record */
145 void free_fib_info(struct fib_info
*fi
)
147 if (fi
->fib_dead
== 0) {
148 printk("Freeing alive fib_info %p\n", fi
);
151 change_nexthops(fi
) {
155 } endfor_nexthops(fi
);
160 void fib_release_info(struct fib_info
*fi
)
162 spin_lock_bh(&fib_info_lock
);
163 if (fi
&& --fi
->fib_treeref
== 0) {
164 hlist_del(&fi
->fib_hash
);
166 hlist_del(&fi
->fib_lhash
);
167 change_nexthops(fi
) {
170 hlist_del(&nh
->nh_hash
);
171 } endfor_nexthops(fi
)
175 spin_unlock_bh(&fib_info_lock
);
178 static __inline__
int nh_comp(const struct fib_info
*fi
, const struct fib_info
*ofi
)
180 const struct fib_nh
*onh
= ofi
->fib_nh
;
183 if (nh
->nh_oif
!= onh
->nh_oif
||
184 nh
->nh_gw
!= onh
->nh_gw
||
185 nh
->nh_scope
!= onh
->nh_scope
||
186 #ifdef CONFIG_IP_ROUTE_MULTIPATH
187 nh
->nh_weight
!= onh
->nh_weight
||
189 #ifdef CONFIG_NET_CLS_ROUTE
190 nh
->nh_tclassid
!= onh
->nh_tclassid
||
192 ((nh
->nh_flags
^onh
->nh_flags
)&~RTNH_F_DEAD
))
195 } endfor_nexthops(fi
);
199 static inline unsigned int fib_info_hashfn(const struct fib_info
*fi
)
201 unsigned int mask
= (fib_hash_size
- 1);
202 unsigned int val
= fi
->fib_nhs
;
204 val
^= fi
->fib_protocol
;
205 val
^= (__force u32
)fi
->fib_prefsrc
;
206 val
^= fi
->fib_priority
;
208 return (val
^ (val
>> 7) ^ (val
>> 12)) & mask
;
211 static struct fib_info
*fib_find_info(const struct fib_info
*nfi
)
213 struct hlist_head
*head
;
214 struct hlist_node
*node
;
218 hash
= fib_info_hashfn(nfi
);
219 head
= &fib_info_hash
[hash
];
221 hlist_for_each_entry(fi
, node
, head
, fib_hash
) {
222 if (fi
->fib_nhs
!= nfi
->fib_nhs
)
224 if (nfi
->fib_protocol
== fi
->fib_protocol
&&
225 nfi
->fib_prefsrc
== fi
->fib_prefsrc
&&
226 nfi
->fib_priority
== fi
->fib_priority
&&
227 memcmp(nfi
->fib_metrics
, fi
->fib_metrics
,
228 sizeof(fi
->fib_metrics
)) == 0 &&
229 ((nfi
->fib_flags
^fi
->fib_flags
)&~RTNH_F_DEAD
) == 0 &&
230 (nfi
->fib_nhs
== 0 || nh_comp(fi
, nfi
) == 0))
237 static inline unsigned int fib_devindex_hashfn(unsigned int val
)
239 unsigned int mask
= DEVINDEX_HASHSIZE
- 1;
242 (val
>> DEVINDEX_HASHBITS
) ^
243 (val
>> (DEVINDEX_HASHBITS
* 2))) & mask
;
246 /* Check, that the gateway is already configured.
247 Used only by redirect accept routine.
250 int ip_fib_check_default(__be32 gw
, struct net_device
*dev
)
252 struct hlist_head
*head
;
253 struct hlist_node
*node
;
257 spin_lock(&fib_info_lock
);
259 hash
= fib_devindex_hashfn(dev
->ifindex
);
260 head
= &fib_info_devhash
[hash
];
261 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
262 if (nh
->nh_dev
== dev
&&
264 !(nh
->nh_flags
&RTNH_F_DEAD
)) {
265 spin_unlock(&fib_info_lock
);
270 spin_unlock(&fib_info_lock
);
275 static inline size_t fib_nlmsg_size(struct fib_info
*fi
)
277 size_t payload
= NLMSG_ALIGN(sizeof(struct rtmsg
))
278 + nla_total_size(4) /* RTA_TABLE */
279 + nla_total_size(4) /* RTA_DST */
280 + nla_total_size(4) /* RTA_PRIORITY */
281 + nla_total_size(4); /* RTA_PREFSRC */
283 /* space for nested metrics */
284 payload
+= nla_total_size((RTAX_MAX
* nla_total_size(4)));
287 /* Also handles the special case fib_nhs == 1 */
289 /* each nexthop is packed in an attribute */
290 size_t nhsize
= nla_total_size(sizeof(struct rtnexthop
));
292 /* may contain flow and gateway attribute */
293 nhsize
+= 2 * nla_total_size(4);
295 /* all nexthops are packed in a nested attribute */
296 payload
+= nla_total_size(fi
->fib_nhs
* nhsize
);
302 void rtmsg_fib(int event
, __be32 key
, struct fib_alias
*fa
,
303 int dst_len
, u32 tb_id
, struct nl_info
*info
,
304 unsigned int nlm_flags
)
307 u32 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
310 skb
= nlmsg_new(fib_nlmsg_size(fa
->fa_info
), GFP_KERNEL
);
314 err
= fib_dump_info(skb
, info
->pid
, seq
, event
, tb_id
,
315 fa
->fa_type
, fa
->fa_scope
, key
, dst_len
,
316 fa
->fa_tos
, fa
->fa_info
, nlm_flags
);
318 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
319 WARN_ON(err
== -EMSGSIZE
);
323 err
= rtnl_notify(skb
, info
->pid
, RTNLGRP_IPV4_ROUTE
,
324 info
->nlh
, GFP_KERNEL
);
327 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE
, err
);
330 /* Return the first fib alias matching TOS with
331 * priority less than or equal to PRIO.
333 struct fib_alias
*fib_find_alias(struct list_head
*fah
, u8 tos
, u32 prio
)
336 struct fib_alias
*fa
;
337 list_for_each_entry(fa
, fah
, fa_list
) {
338 if (fa
->fa_tos
> tos
)
340 if (fa
->fa_info
->fib_priority
>= prio
||
348 int fib_detect_death(struct fib_info
*fi
, int order
,
349 struct fib_info
**last_resort
, int *last_idx
, int *dflt
)
352 int state
= NUD_NONE
;
354 n
= neigh_lookup(&arp_tbl
, &fi
->fib_nh
[0].nh_gw
, fi
->fib_dev
);
356 state
= n
->nud_state
;
359 if (state
==NUD_REACHABLE
)
361 if ((state
&NUD_VALID
) && order
!= *dflt
)
363 if ((state
&NUD_VALID
) ||
364 (*last_idx
<0 && order
> *dflt
)) {
371 #ifdef CONFIG_IP_ROUTE_MULTIPATH
373 static int fib_count_nexthops(struct rtnexthop
*rtnh
, int remaining
)
377 while (rtnh_ok(rtnh
, remaining
)) {
379 rtnh
= rtnh_next(rtnh
, &remaining
);
382 /* leftover implies invalid nexthop configuration, discard it */
383 return remaining
> 0 ? 0 : nhs
;
386 static int fib_get_nhs(struct fib_info
*fi
, struct rtnexthop
*rtnh
,
387 int remaining
, struct fib_config
*cfg
)
389 change_nexthops(fi
) {
392 if (!rtnh_ok(rtnh
, remaining
))
395 nh
->nh_flags
= (cfg
->fc_flags
& ~0xFF) | rtnh
->rtnh_flags
;
396 nh
->nh_oif
= rtnh
->rtnh_ifindex
;
397 nh
->nh_weight
= rtnh
->rtnh_hops
+ 1;
399 attrlen
= rtnh_attrlen(rtnh
);
401 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
403 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
404 nh
->nh_gw
= nla
? nla_get_be32(nla
) : 0;
405 #ifdef CONFIG_NET_CLS_ROUTE
406 nla
= nla_find(attrs
, attrlen
, RTA_FLOW
);
407 nh
->nh_tclassid
= nla
? nla_get_u32(nla
) : 0;
411 rtnh
= rtnh_next(rtnh
, &remaining
);
412 } endfor_nexthops(fi
);
419 int fib_nh_match(struct fib_config
*cfg
, struct fib_info
*fi
)
421 #ifdef CONFIG_IP_ROUTE_MULTIPATH
422 struct rtnexthop
*rtnh
;
426 if (cfg
->fc_priority
&& cfg
->fc_priority
!= fi
->fib_priority
)
429 if (cfg
->fc_oif
|| cfg
->fc_gw
) {
430 if ((!cfg
->fc_oif
|| cfg
->fc_oif
== fi
->fib_nh
->nh_oif
) &&
431 (!cfg
->fc_gw
|| cfg
->fc_gw
== fi
->fib_nh
->nh_gw
))
436 #ifdef CONFIG_IP_ROUTE_MULTIPATH
437 if (cfg
->fc_mp
== NULL
)
441 remaining
= cfg
->fc_mp_len
;
446 if (!rtnh_ok(rtnh
, remaining
))
449 if (rtnh
->rtnh_ifindex
&& rtnh
->rtnh_ifindex
!= nh
->nh_oif
)
452 attrlen
= rtnh_attrlen(rtnh
);
454 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
456 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
457 if (nla
&& nla_get_be32(nla
) != nh
->nh_gw
)
459 #ifdef CONFIG_NET_CLS_ROUTE
460 nla
= nla_find(attrs
, attrlen
, RTA_FLOW
);
461 if (nla
&& nla_get_u32(nla
) != nh
->nh_tclassid
)
466 rtnh
= rtnh_next(rtnh
, &remaining
);
467 } endfor_nexthops(fi
);
477 Semantics of nexthop is very messy by historical reasons.
478 We have to take into account, that:
479 a) gateway can be actually local interface address,
480 so that gatewayed route is direct.
481 b) gateway must be on-link address, possibly
482 described not by an ifaddr, but also by a direct route.
483 c) If both gateway and interface are specified, they should not
485 d) If we use tunnel routes, gateway could be not on-link.
487 Attempt to reconcile all of these (alas, self-contradictory) conditions
488 results in pretty ugly and hairy code with obscure logic.
490 I chose to generalized it instead, so that the size
491 of code does not increase practically, but it becomes
493 Every prefix is assigned a "scope" value: "host" is local address,
494 "link" is direct route,
495 [ ... "site" ... "interior" ... ]
496 and "universe" is true gateway route with global meaning.
498 Every prefix refers to a set of "nexthop"s (gw, oif),
499 where gw must have narrower scope. This recursion stops
500 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
501 which means that gw is forced to be on link.
503 Code is still hairy, but now it is apparently logically
504 consistent and very flexible. F.e. as by-product it allows
505 to co-exists in peace independent exterior and interior
508 Normally it looks as following.
510 {universe prefix} -> (gw, oif) [scope link]
512 |-> {link prefix} -> (gw, oif) [scope local]
514 |-> {local prefix} (terminal node)
517 static int fib_check_nh(struct fib_config
*cfg
, struct fib_info
*fi
,
523 struct fib_result res
;
525 #ifdef CONFIG_IP_ROUTE_PERVASIVE
526 if (nh
->nh_flags
&RTNH_F_PERVASIVE
)
529 if (nh
->nh_flags
&RTNH_F_ONLINK
) {
530 struct net_device
*dev
;
532 if (cfg
->fc_scope
>= RT_SCOPE_LINK
)
534 if (inet_addr_type(nh
->nh_gw
) != RTN_UNICAST
)
536 if ((dev
= __dev_get_by_index(&init_net
, nh
->nh_oif
)) == NULL
)
538 if (!(dev
->flags
&IFF_UP
))
542 nh
->nh_scope
= RT_SCOPE_LINK
;
550 .scope
= cfg
->fc_scope
+ 1,
556 /* It is not necessary, but requires a bit of thinking */
557 if (fl
.fl4_scope
< RT_SCOPE_LINK
)
558 fl
.fl4_scope
= RT_SCOPE_LINK
;
559 if ((err
= fib_lookup(&fl
, &res
)) != 0)
563 if (res
.type
!= RTN_UNICAST
&& res
.type
!= RTN_LOCAL
)
565 nh
->nh_scope
= res
.scope
;
566 nh
->nh_oif
= FIB_RES_OIF(res
);
567 if ((nh
->nh_dev
= FIB_RES_DEV(res
)) == NULL
)
569 dev_hold(nh
->nh_dev
);
571 if (!(nh
->nh_dev
->flags
& IFF_UP
))
578 struct in_device
*in_dev
;
580 if (nh
->nh_flags
&(RTNH_F_PERVASIVE
|RTNH_F_ONLINK
))
583 in_dev
= inetdev_by_index(nh
->nh_oif
);
586 if (!(in_dev
->dev
->flags
&IFF_UP
)) {
590 nh
->nh_dev
= in_dev
->dev
;
591 dev_hold(nh
->nh_dev
);
592 nh
->nh_scope
= RT_SCOPE_HOST
;
598 static inline unsigned int fib_laddr_hashfn(__be32 val
)
600 unsigned int mask
= (fib_hash_size
- 1);
602 return ((__force u32
)val
^ ((__force u32
)val
>> 7) ^ ((__force u32
)val
>> 14)) & mask
;
605 static struct hlist_head
*fib_hash_alloc(int bytes
)
607 if (bytes
<= PAGE_SIZE
)
608 return kmalloc(bytes
, GFP_KERNEL
);
610 return (struct hlist_head
*)
611 __get_free_pages(GFP_KERNEL
, get_order(bytes
));
614 static void fib_hash_free(struct hlist_head
*hash
, int bytes
)
619 if (bytes
<= PAGE_SIZE
)
622 free_pages((unsigned long) hash
, get_order(bytes
));
625 static void fib_hash_move(struct hlist_head
*new_info_hash
,
626 struct hlist_head
*new_laddrhash
,
627 unsigned int new_size
)
629 struct hlist_head
*old_info_hash
, *old_laddrhash
;
630 unsigned int old_size
= fib_hash_size
;
631 unsigned int i
, bytes
;
633 spin_lock_bh(&fib_info_lock
);
634 old_info_hash
= fib_info_hash
;
635 old_laddrhash
= fib_info_laddrhash
;
636 fib_hash_size
= new_size
;
638 for (i
= 0; i
< old_size
; i
++) {
639 struct hlist_head
*head
= &fib_info_hash
[i
];
640 struct hlist_node
*node
, *n
;
643 hlist_for_each_entry_safe(fi
, node
, n
, head
, fib_hash
) {
644 struct hlist_head
*dest
;
645 unsigned int new_hash
;
647 hlist_del(&fi
->fib_hash
);
649 new_hash
= fib_info_hashfn(fi
);
650 dest
= &new_info_hash
[new_hash
];
651 hlist_add_head(&fi
->fib_hash
, dest
);
654 fib_info_hash
= new_info_hash
;
656 for (i
= 0; i
< old_size
; i
++) {
657 struct hlist_head
*lhead
= &fib_info_laddrhash
[i
];
658 struct hlist_node
*node
, *n
;
661 hlist_for_each_entry_safe(fi
, node
, n
, lhead
, fib_lhash
) {
662 struct hlist_head
*ldest
;
663 unsigned int new_hash
;
665 hlist_del(&fi
->fib_lhash
);
667 new_hash
= fib_laddr_hashfn(fi
->fib_prefsrc
);
668 ldest
= &new_laddrhash
[new_hash
];
669 hlist_add_head(&fi
->fib_lhash
, ldest
);
672 fib_info_laddrhash
= new_laddrhash
;
674 spin_unlock_bh(&fib_info_lock
);
676 bytes
= old_size
* sizeof(struct hlist_head
*);
677 fib_hash_free(old_info_hash
, bytes
);
678 fib_hash_free(old_laddrhash
, bytes
);
681 struct fib_info
*fib_create_info(struct fib_config
*cfg
)
684 struct fib_info
*fi
= NULL
;
685 struct fib_info
*ofi
;
688 /* Fast check to catch the most weird cases */
689 if (fib_props
[cfg
->fc_type
].scope
> cfg
->fc_scope
)
692 #ifdef CONFIG_IP_ROUTE_MULTIPATH
694 nhs
= fib_count_nexthops(cfg
->fc_mp
, cfg
->fc_mp_len
);
701 if (fib_info_cnt
>= fib_hash_size
) {
702 unsigned int new_size
= fib_hash_size
<< 1;
703 struct hlist_head
*new_info_hash
;
704 struct hlist_head
*new_laddrhash
;
709 bytes
= new_size
* sizeof(struct hlist_head
*);
710 new_info_hash
= fib_hash_alloc(bytes
);
711 new_laddrhash
= fib_hash_alloc(bytes
);
712 if (!new_info_hash
|| !new_laddrhash
) {
713 fib_hash_free(new_info_hash
, bytes
);
714 fib_hash_free(new_laddrhash
, bytes
);
716 memset(new_info_hash
, 0, bytes
);
717 memset(new_laddrhash
, 0, bytes
);
719 fib_hash_move(new_info_hash
, new_laddrhash
, new_size
);
726 fi
= kzalloc(sizeof(*fi
)+nhs
*sizeof(struct fib_nh
), GFP_KERNEL
);
731 fi
->fib_protocol
= cfg
->fc_protocol
;
732 fi
->fib_flags
= cfg
->fc_flags
;
733 fi
->fib_priority
= cfg
->fc_priority
;
734 fi
->fib_prefsrc
= cfg
->fc_prefsrc
;
737 change_nexthops(fi
) {
739 } endfor_nexthops(fi
)
745 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
746 int type
= nla_type(nla
);
751 fi
->fib_metrics
[type
- 1] = nla_get_u32(nla
);
757 #ifdef CONFIG_IP_ROUTE_MULTIPATH
758 err
= fib_get_nhs(fi
, cfg
->fc_mp
, cfg
->fc_mp_len
, cfg
);
761 if (cfg
->fc_oif
&& fi
->fib_nh
->nh_oif
!= cfg
->fc_oif
)
763 if (cfg
->fc_gw
&& fi
->fib_nh
->nh_gw
!= cfg
->fc_gw
)
765 #ifdef CONFIG_NET_CLS_ROUTE
766 if (cfg
->fc_flow
&& fi
->fib_nh
->nh_tclassid
!= cfg
->fc_flow
)
773 struct fib_nh
*nh
= fi
->fib_nh
;
775 nh
->nh_oif
= cfg
->fc_oif
;
776 nh
->nh_gw
= cfg
->fc_gw
;
777 nh
->nh_flags
= cfg
->fc_flags
;
778 #ifdef CONFIG_NET_CLS_ROUTE
779 nh
->nh_tclassid
= cfg
->fc_flow
;
781 #ifdef CONFIG_IP_ROUTE_MULTIPATH
786 if (fib_props
[cfg
->fc_type
].error
) {
787 if (cfg
->fc_gw
|| cfg
->fc_oif
|| cfg
->fc_mp
)
792 if (cfg
->fc_scope
> RT_SCOPE_HOST
)
795 if (cfg
->fc_scope
== RT_SCOPE_HOST
) {
796 struct fib_nh
*nh
= fi
->fib_nh
;
798 /* Local address is added. */
799 if (nhs
!= 1 || nh
->nh_gw
)
801 nh
->nh_scope
= RT_SCOPE_NOWHERE
;
802 nh
->nh_dev
= dev_get_by_index(&init_net
, fi
->fib_nh
->nh_oif
);
804 if (nh
->nh_dev
== NULL
)
807 change_nexthops(fi
) {
808 if ((err
= fib_check_nh(cfg
, fi
, nh
)) != 0)
810 } endfor_nexthops(fi
)
813 if (fi
->fib_prefsrc
) {
814 if (cfg
->fc_type
!= RTN_LOCAL
|| !cfg
->fc_dst
||
815 fi
->fib_prefsrc
!= cfg
->fc_dst
)
816 if (inet_addr_type(fi
->fib_prefsrc
) != RTN_LOCAL
)
821 if ((ofi
= fib_find_info(fi
)) != NULL
) {
829 atomic_inc(&fi
->fib_clntref
);
830 spin_lock_bh(&fib_info_lock
);
831 hlist_add_head(&fi
->fib_hash
,
832 &fib_info_hash
[fib_info_hashfn(fi
)]);
833 if (fi
->fib_prefsrc
) {
834 struct hlist_head
*head
;
836 head
= &fib_info_laddrhash
[fib_laddr_hashfn(fi
->fib_prefsrc
)];
837 hlist_add_head(&fi
->fib_lhash
, head
);
839 change_nexthops(fi
) {
840 struct hlist_head
*head
;
845 hash
= fib_devindex_hashfn(nh
->nh_dev
->ifindex
);
846 head
= &fib_info_devhash
[hash
];
847 hlist_add_head(&nh
->nh_hash
, head
);
848 } endfor_nexthops(fi
)
849 spin_unlock_bh(&fib_info_lock
);
864 /* Note! fib_semantic_match intentionally uses RCU list functions. */
865 int fib_semantic_match(struct list_head
*head
, const struct flowi
*flp
,
866 struct fib_result
*res
, __be32 zone
, __be32 mask
,
869 struct fib_alias
*fa
;
872 list_for_each_entry_rcu(fa
, head
, fa_list
) {
876 fa
->fa_tos
!= flp
->fl4_tos
)
879 if (fa
->fa_scope
< flp
->fl4_scope
)
882 fa
->fa_state
|= FA_S_ACCESSED
;
884 err
= fib_props
[fa
->fa_type
].error
;
886 struct fib_info
*fi
= fa
->fa_info
;
888 if (fi
->fib_flags
& RTNH_F_DEAD
)
891 switch (fa
->fa_type
) {
898 if (nh
->nh_flags
&RTNH_F_DEAD
)
900 if (!flp
->oif
|| flp
->oif
== nh
->nh_oif
)
903 #ifdef CONFIG_IP_ROUTE_MULTIPATH
904 if (nhsel
< fi
->fib_nhs
) {
917 printk(KERN_DEBUG
"impossible 102\n");
926 res
->prefixlen
= prefixlen
;
927 res
->nh_sel
= nh_sel
;
928 res
->type
= fa
->fa_type
;
929 res
->scope
= fa
->fa_scope
;
930 res
->fi
= fa
->fa_info
;
931 atomic_inc(&res
->fi
->fib_clntref
);
935 /* Find appropriate source address to this destination */
937 __be32
__fib_res_prefsrc(struct fib_result
*res
)
939 return inet_select_addr(FIB_RES_DEV(*res
), FIB_RES_GW(*res
), res
->scope
);
942 int fib_dump_info(struct sk_buff
*skb
, u32 pid
, u32 seq
, int event
,
943 u32 tb_id
, u8 type
, u8 scope
, __be32 dst
, int dst_len
, u8 tos
,
944 struct fib_info
*fi
, unsigned int flags
)
946 struct nlmsghdr
*nlh
;
949 nlh
= nlmsg_put(skb
, pid
, seq
, event
, sizeof(*rtm
), flags
);
953 rtm
= nlmsg_data(nlh
);
954 rtm
->rtm_family
= AF_INET
;
955 rtm
->rtm_dst_len
= dst_len
;
956 rtm
->rtm_src_len
= 0;
958 rtm
->rtm_table
= tb_id
;
959 NLA_PUT_U32(skb
, RTA_TABLE
, tb_id
);
960 rtm
->rtm_type
= type
;
961 rtm
->rtm_flags
= fi
->fib_flags
;
962 rtm
->rtm_scope
= scope
;
963 rtm
->rtm_protocol
= fi
->fib_protocol
;
965 if (rtm
->rtm_dst_len
)
966 NLA_PUT_BE32(skb
, RTA_DST
, dst
);
968 if (fi
->fib_priority
)
969 NLA_PUT_U32(skb
, RTA_PRIORITY
, fi
->fib_priority
);
971 if (rtnetlink_put_metrics(skb
, fi
->fib_metrics
) < 0)
972 goto nla_put_failure
;
975 NLA_PUT_BE32(skb
, RTA_PREFSRC
, fi
->fib_prefsrc
);
977 if (fi
->fib_nhs
== 1) {
978 if (fi
->fib_nh
->nh_gw
)
979 NLA_PUT_BE32(skb
, RTA_GATEWAY
, fi
->fib_nh
->nh_gw
);
981 if (fi
->fib_nh
->nh_oif
)
982 NLA_PUT_U32(skb
, RTA_OIF
, fi
->fib_nh
->nh_oif
);
983 #ifdef CONFIG_NET_CLS_ROUTE
984 if (fi
->fib_nh
[0].nh_tclassid
)
985 NLA_PUT_U32(skb
, RTA_FLOW
, fi
->fib_nh
[0].nh_tclassid
);
988 #ifdef CONFIG_IP_ROUTE_MULTIPATH
989 if (fi
->fib_nhs
> 1) {
990 struct rtnexthop
*rtnh
;
993 mp
= nla_nest_start(skb
, RTA_MULTIPATH
);
995 goto nla_put_failure
;
998 rtnh
= nla_reserve_nohdr(skb
, sizeof(*rtnh
));
1000 goto nla_put_failure
;
1002 rtnh
->rtnh_flags
= nh
->nh_flags
& 0xFF;
1003 rtnh
->rtnh_hops
= nh
->nh_weight
- 1;
1004 rtnh
->rtnh_ifindex
= nh
->nh_oif
;
1007 NLA_PUT_BE32(skb
, RTA_GATEWAY
, nh
->nh_gw
);
1008 #ifdef CONFIG_NET_CLS_ROUTE
1009 if (nh
->nh_tclassid
)
1010 NLA_PUT_U32(skb
, RTA_FLOW
, nh
->nh_tclassid
);
1012 /* length of rtnetlink header + attributes */
1013 rtnh
->rtnh_len
= nlmsg_get_pos(skb
) - (void *) rtnh
;
1014 } endfor_nexthops(fi
);
1016 nla_nest_end(skb
, mp
);
1019 return nlmsg_end(skb
, nlh
);
1022 nlmsg_cancel(skb
, nlh
);
1028 - local address disappeared -> we must delete all the entries
1030 - device went down -> we must shutdown all nexthops going via it.
1033 int fib_sync_down(__be32 local
, struct net_device
*dev
, int force
)
1036 int scope
= RT_SCOPE_NOWHERE
;
1041 if (local
&& fib_info_laddrhash
) {
1042 unsigned int hash
= fib_laddr_hashfn(local
);
1043 struct hlist_head
*head
= &fib_info_laddrhash
[hash
];
1044 struct hlist_node
*node
;
1045 struct fib_info
*fi
;
1047 hlist_for_each_entry(fi
, node
, head
, fib_lhash
) {
1048 if (fi
->fib_prefsrc
== local
) {
1049 fi
->fib_flags
|= RTNH_F_DEAD
;
1056 struct fib_info
*prev_fi
= NULL
;
1057 unsigned int hash
= fib_devindex_hashfn(dev
->ifindex
);
1058 struct hlist_head
*head
= &fib_info_devhash
[hash
];
1059 struct hlist_node
*node
;
1062 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
1063 struct fib_info
*fi
= nh
->nh_parent
;
1066 BUG_ON(!fi
->fib_nhs
);
1067 if (nh
->nh_dev
!= dev
|| fi
== prev_fi
)
1071 change_nexthops(fi
) {
1072 if (nh
->nh_flags
&RTNH_F_DEAD
)
1074 else if (nh
->nh_dev
== dev
&&
1075 nh
->nh_scope
!= scope
) {
1076 nh
->nh_flags
|= RTNH_F_DEAD
;
1077 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1078 spin_lock_bh(&fib_multipath_lock
);
1079 fi
->fib_power
-= nh
->nh_power
;
1081 spin_unlock_bh(&fib_multipath_lock
);
1085 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1086 if (force
> 1 && nh
->nh_dev
== dev
) {
1091 } endfor_nexthops(fi
)
1092 if (dead
== fi
->fib_nhs
) {
1093 fi
->fib_flags
|= RTNH_F_DEAD
;
1102 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1105 Dead device goes up. We wake up dead nexthops.
1106 It takes sense only on multipath routes.
1109 int fib_sync_up(struct net_device
*dev
)
1111 struct fib_info
*prev_fi
;
1113 struct hlist_head
*head
;
1114 struct hlist_node
*node
;
1118 if (!(dev
->flags
&IFF_UP
))
1122 hash
= fib_devindex_hashfn(dev
->ifindex
);
1123 head
= &fib_info_devhash
[hash
];
1126 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
1127 struct fib_info
*fi
= nh
->nh_parent
;
1130 BUG_ON(!fi
->fib_nhs
);
1131 if (nh
->nh_dev
!= dev
|| fi
== prev_fi
)
1136 change_nexthops(fi
) {
1137 if (!(nh
->nh_flags
&RTNH_F_DEAD
)) {
1141 if (nh
->nh_dev
== NULL
|| !(nh
->nh_dev
->flags
&IFF_UP
))
1143 if (nh
->nh_dev
!= dev
|| !__in_dev_get_rtnl(dev
))
1146 spin_lock_bh(&fib_multipath_lock
);
1148 nh
->nh_flags
&= ~RTNH_F_DEAD
;
1149 spin_unlock_bh(&fib_multipath_lock
);
1150 } endfor_nexthops(fi
)
1153 fi
->fib_flags
&= ~RTNH_F_DEAD
;
1162 The algorithm is suboptimal, but it provides really
1163 fair weighted route distribution.
1166 void fib_select_multipath(const struct flowi
*flp
, struct fib_result
*res
)
1168 struct fib_info
*fi
= res
->fi
;
1171 spin_lock_bh(&fib_multipath_lock
);
1172 if (fi
->fib_power
<= 0) {
1174 change_nexthops(fi
) {
1175 if (!(nh
->nh_flags
&RTNH_F_DEAD
)) {
1176 power
+= nh
->nh_weight
;
1177 nh
->nh_power
= nh
->nh_weight
;
1179 } endfor_nexthops(fi
);
1180 fi
->fib_power
= power
;
1182 spin_unlock_bh(&fib_multipath_lock
);
1183 /* Race condition: route has just become dead. */
1190 /* w should be random number [0..fi->fib_power-1],
1191 it is pretty bad approximation.
1194 w
= jiffies
% fi
->fib_power
;
1196 change_nexthops(fi
) {
1197 if (!(nh
->nh_flags
&RTNH_F_DEAD
) && nh
->nh_power
) {
1198 if ((w
-= nh
->nh_power
) <= 0) {
1201 res
->nh_sel
= nhsel
;
1202 spin_unlock_bh(&fib_multipath_lock
);
1206 } endfor_nexthops(fi
);
1208 /* Race condition: route has just become dead. */
1210 spin_unlock_bh(&fib_multipath_lock
);