2 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
5 * Copyright (c) 1983, 1988, 1993
6 * The Regents of the University of California. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgment:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * $FreeBSD: src/sbin/routed/table.c,v 1.15 2000/08/11 08:24:38 sheldonh Exp $
42 #include <sys/tihdr.h>
43 #include <inet/mib2.h>
46 /* This structure is used to store a disassembled routing socket message. */
49 struct sockaddr_storage
*rti_info
[RTAX_MAX
];
52 static struct rt_spare
*rts_better(struct rt_entry
*);
53 static struct rt_spare rts_empty
= EMPTY_RT_SPARE
;
54 static void set_need_flash(void);
55 static void rtbad(struct rt_entry
*, struct interface
*);
56 static int rt_xaddrs(struct rt_addrinfo
*, struct sockaddr_storage
*,
58 static struct interface
*gwkludge_iflookup(in_addr_t
, in_addr_t
, in_addr_t
);
59 static struct interface
*lifp_iflookup(in_addr_t
, const char *);
61 struct radix_node_head
*rhead
; /* root of the radix tree */
63 /* Flash update needed. _B_TRUE to suppress the 1st. */
64 boolean_t need_flash
= _B_TRUE
;
66 struct timeval age_timer
; /* next check of old routes */
67 struct timeval need_kern
= { /* need to update kernel table */
68 EPOCH
+MIN_WAITTIME
-1, 0
71 static uint32_t total_routes
;
73 #define ROUNDUP_LONG(a) \
74 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
77 * It is desirable to "aggregate" routes, to combine differing routes of
78 * the same metric and next hop into a common route with a smaller netmask
79 * or to suppress redundant routes, routes that add no information to
80 * routes with smaller netmasks.
82 * A route is redundant if and only if any and all routes with smaller
83 * but matching netmasks and nets are the same. Since routes are
84 * kept sorted in the radix tree, redundant routes always come second.
86 * There are two kinds of aggregations. First, two routes of the same bit
87 * mask and differing only in the least significant bit of the network
88 * number can be combined into a single route with a coarser mask.
90 * Second, a route can be suppressed in favor of another route with a more
91 * coarse mask provided no incompatible routes with intermediate masks
92 * are present. The second kind of aggregation involves suppressing routes.
93 * A route must not be suppressed if an incompatible route exists with
94 * an intermediate mask, since the suppressed route would be covered
95 * by the intermediate.
97 * This code relies on the radix tree walk encountering routes
98 * sorted first by address, with the smallest address first.
101 static struct ag_info ag_slots
[NUM_AG_SLOTS
], *ag_avail
, *ag_corsest
,
105 #define CHECK_AG() do { int acnt = 0; struct ag_info *cag; \
106 for (cag = ag_avail; cag != NULL; cag = cag->ag_fine) \
108 for (cag = ag_corsest; cag != NULL; cag = cag->ag_fine) \
110 if (acnt != NUM_AG_SLOTS) \
114 #define CHECK_AG() (void)0
119 * Output the contents of an aggregation table slot.
120 * This function must always be immediately followed with the deletion
121 * of the target slot.
124 ag_out(struct ag_info
*ag
, void (*out
)(struct ag_info
*))
126 struct ag_info
*ag_cors
;
130 /* Forget it if this route should not be output for split-horizon. */
131 if (ag
->ag_state
& AGS_SPLIT_HZ
)
135 * If we output both the even and odd twins, then the immediate parent,
136 * if it is present, is redundant, unless the parent manages to
137 * aggregate into something coarser.
138 * On successive calls, this code detects the even and odd twins,
139 * and marks the parent.
141 * Note that the order in which the radix tree code emits routes
142 * ensures that the twins are seen before the parent is emitted.
144 ag_cors
= ag
->ag_cors
;
145 if (ag_cors
!= NULL
&&
146 ag_cors
->ag_mask
== (ag
->ag_mask
<< 1) &&
147 ag_cors
->ag_dst_h
== (ag
->ag_dst_h
& ag_cors
->ag_mask
)) {
148 ag_cors
->ag_state
|= ((ag_cors
->ag_dst_h
== ag
->ag_dst_h
) ?
149 AGS_REDUN0
: AGS_REDUN1
);
153 * Skip it if this route is itself redundant.
155 * It is ok to change the contents of the slot here, since it is
156 * always deleted next.
158 if (ag
->ag_state
& AGS_REDUN0
) {
159 if (ag
->ag_state
& AGS_REDUN1
)
160 return; /* quit if fully redundant */
161 /* make it finer if it is half-redundant */
162 bit
= (-ag
->ag_mask
) >> 1;
166 } else if (ag
->ag_state
& AGS_REDUN1
) {
167 /* make it finer if it is half-redundant */
168 bit
= (-ag
->ag_mask
) >> 1;
176 ag_del(struct ag_info
*ag
)
180 if (ag
->ag_cors
== NULL
)
181 ag_corsest
= ag
->ag_fine
;
183 ag
->ag_cors
->ag_fine
= ag
->ag_fine
;
185 if (ag
->ag_fine
== NULL
)
186 ag_finest
= ag
->ag_cors
;
188 ag
->ag_fine
->ag_cors
= ag
->ag_cors
;
190 ag
->ag_fine
= ag_avail
;
197 /* Look for a route that can suppress the given route. */
198 static struct ag_info
*
199 ag_find_suppressor(struct ag_info
*ag
)
201 struct ag_info
*ag_cors
;
202 in_addr_t dst_h
= ag
->ag_dst_h
;
204 for (ag_cors
= ag
->ag_cors
; ag_cors
!= NULL
;
205 ag_cors
= ag_cors
->ag_cors
) {
207 if ((dst_h
& ag_cors
->ag_mask
) == ag_cors
->ag_dst_h
) {
209 * We found a route with a coarser mask that covers
210 * the given target. It can suppress the target
211 * only if it has a good enough metric and it
212 * either has the same (gateway, ifp), or if its state
213 * includes AGS_CORS_GATE or the target's state
214 * includes AGS_FINE_GATE.
216 if (ag_cors
->ag_pref
<= ag
->ag_pref
&&
217 (((ag
->ag_nhop
== ag_cors
->ag_nhop
) &&
218 (ag
->ag_ifp
== ag_cors
->ag_ifp
)) ||
219 ag_cors
->ag_state
& AGS_CORS_GATE
||
220 ag
->ag_state
& AGS_FINE_GATE
)) {
231 * Flush routes waiting for aggregation.
232 * This must not suppress a route unless it is known that among all routes
233 * with coarser masks that match it, the one with the longest mask is
234 * appropriate. This is ensured by scanning the routes in lexical order,
235 * and with the most restrictive mask first among routes to the same
239 ag_flush(in_addr_t lim_dst_h
, /* flush routes to here */
240 in_addr_t lim_mask
, /* matching this mask */
241 void (*out
)(struct ag_info
*))
243 struct ag_info
*ag
, *ag_cors
, *ag_supr
;
247 for (ag
= ag_finest
; ag
!= NULL
&& ag
->ag_mask
>= lim_mask
;
249 /* Get the next route now, before we delete ag. */
250 ag_cors
= ag
->ag_cors
;
252 /* Work on only the specified routes. */
253 dst_h
= ag
->ag_dst_h
;
254 if ((dst_h
& lim_mask
) != lim_dst_h
)
258 * Don't try to suppress the route if its state doesn't
259 * include AGS_SUPPRESS.
261 if (!(ag
->ag_state
& AGS_SUPPRESS
)) {
267 ag_supr
= ag_find_suppressor(ag
);
268 if (ag_supr
== NULL
) {
270 * We didn't find a route which suppresses the
271 * target, so the target can go out.
276 * We found a route which suppresses the target, so
277 * don't output the target.
280 trace_misc("aggregated away %s",
281 rtname(htonl(ag
->ag_dst_h
), ag
->ag_mask
,
283 trace_misc("on coarser route %s",
284 rtname(htonl(ag_supr
->ag_dst_h
),
285 ag_supr
->ag_mask
, ag_supr
->ag_nhop
));
288 * If the suppressed target was redundant, then
289 * mark the suppressor as redundant.
291 if (AG_IS_REDUN(ag
->ag_state
) &&
292 ag_supr
->ag_mask
== (ag
->ag_mask
<<1)) {
293 if (ag_supr
->ag_dst_h
== dst_h
)
294 ag_supr
->ag_state
|= AGS_REDUN0
;
296 ag_supr
->ag_state
|= AGS_REDUN1
;
298 if (ag
->ag_tag
!= ag_supr
->ag_tag
)
300 if (ag
->ag_nhop
!= ag_supr
->ag_nhop
)
301 ag_supr
->ag_nhop
= 0;
304 /* The route has either been output or suppressed */
312 /* Try to aggregate a route with previous routes. */
314 ag_check(in_addr_t dst
,
317 struct interface
*ifp
,
324 void (*out
)(struct ag_info
*)) /* output using this */
326 struct ag_info
*ag
, *nag
, *ag_cors
;
329 struct interface
*xifp
;
334 * Don't bother trying to aggregate routes with non-contiguous
337 * (X & -X) contains a single bit if and only if X is a power of 2.
338 * (X + (X & -X)) == 0 if and only if X is a power of 2.
340 if ((mask
& -mask
) + mask
!= 0) {
341 struct ag_info nc_ag
;
343 nc_ag
.ag_dst_h
= dst
;
344 nc_ag
.ag_mask
= mask
;
345 nc_ag
.ag_gate
= gate
;
347 nc_ag
.ag_nhop
= nhop
;
348 nc_ag
.ag_metric
= metric
;
349 nc_ag
.ag_pref
= pref
;
351 nc_ag
.ag_state
= state
;
352 nc_ag
.ag_seqno
= seqno
;
357 /* Search for the right slot in the aggregation table. */
361 if (ag
->ag_mask
>= mask
)
365 * Suppress old routes (i.e. combine with compatible routes
366 * with coarser masks) as we look for the right slot in the
367 * aggregation table for the new route.
368 * A route to an address less than the current destination
369 * will not be affected by the current route or any route
370 * seen hereafter. That means it is safe to suppress it.
371 * This check keeps poor routes (e.g. with large hop counts)
372 * from preventing suppression of finer routes.
374 if (ag_cors
!= NULL
&& ag
->ag_dst_h
< dst
&&
375 (ag
->ag_state
& AGS_SUPPRESS
) &&
376 ag_cors
->ag_pref
<= ag
->ag_pref
&&
377 (ag
->ag_dst_h
& ag_cors
->ag_mask
) == ag_cors
->ag_dst_h
&&
378 ((ag_cors
->ag_nhop
== ag
->ag_nhop
&&
379 (ag_cors
->ag_ifp
== ag
->ag_ifp
))||
380 (ag
->ag_state
& AGS_FINE_GATE
) ||
381 (ag_cors
->ag_state
& AGS_CORS_GATE
))) {
383 * If the suppressed target was redundant,
384 * then mark the suppressor redundant.
386 if (AG_IS_REDUN(ag
->ag_state
) &&
387 ag_cors
->ag_mask
== (ag
->ag_mask
<< 1)) {
388 if (ag_cors
->ag_dst_h
== dst
)
389 ag_cors
->ag_state
|= AGS_REDUN0
;
391 ag_cors
->ag_state
|= AGS_REDUN1
;
393 if (ag
->ag_tag
!= ag_cors
->ag_tag
)
395 if (ag
->ag_nhop
!= ag_cors
->ag_nhop
)
396 ag_cors
->ag_nhop
= 0;
402 ag
= ag_cors
->ag_fine
;
406 * If we find the even/odd twin of the new route, and if the
407 * masks and so forth are equal, we can aggregate them.
408 * We can probably promote one of the pair.
410 * Since the routes are encountered in lexical order,
411 * the new route must be odd. However, the second or later
412 * times around this loop, it could be the even twin promoted
413 * from the even/odd pair of twins of the finer route.
415 while (ag
!= NULL
&& ag
->ag_mask
== mask
&&
416 ((ag
->ag_dst_h
^ dst
) & (mask
<<1)) == 0) {
419 * Here we know the target route and the route in the current
420 * slot have the same netmasks and differ by at most the
421 * last bit. They are either for the same destination, or
422 * for an even/odd pair of destinations.
424 if (ag
->ag_dst_h
== dst
) {
425 if (ag
->ag_nhop
== nhop
&& ag
->ag_ifp
== ifp
) {
427 * We have two routes to the same destination,
428 * with the same nexthop and interface.
429 * Routes are encountered in lexical order,
430 * so a route is never promoted until the
431 * parent route is already present. So we
432 * know that the new route is a promoted (or
433 * aggregated) pair and the route already in
434 * the slot is the explicit route.
436 * Prefer the best route if their metrics
437 * differ, or the aggregated one if not,
438 * following a sort of longest-match rule.
440 if (pref
<= ag
->ag_pref
) {
445 ag
->ag_metric
= metric
;
447 if (seqno
> ag
->ag_seqno
)
448 ag
->ag_seqno
= seqno
;
450 ag
->ag_state
= state
;
455 * Some bits are set if they are set on
456 * either route, except when the route is
459 if (!(ag
->ag_state
& AGS_IF
))
461 (state
& (AGS_AGGREGATE_EITHER
|
462 AGS_REDUN0
| AGS_REDUN1
));
467 * multiple routes to same dest/mask with
468 * differing gate nexthop/or ifp. Flush
476 * If one of the routes can be promoted and the other can
477 * be suppressed, it may be possible to combine them or
478 * worthwhile to promote one.
480 * Any route that can be promoted is always
481 * marked to be eligible to be suppressed.
483 if (!((state
& AGS_AGGREGATE
) &&
484 (ag
->ag_state
& AGS_SUPPRESS
)) &&
485 !((ag
->ag_state
& AGS_AGGREGATE
) && (state
& AGS_SUPPRESS
)))
489 * A pair of even/odd twin routes can be combined
490 * if either is redundant, or if they are via the
491 * same gateway and have the same metric.
493 if (AG_IS_REDUN(ag
->ag_state
) || AG_IS_REDUN(state
) ||
494 (ag
->ag_nhop
== nhop
&& ag
->ag_ifp
== ifp
&&
495 ag
->ag_pref
== pref
&&
496 (state
& ag
->ag_state
& AGS_AGGREGATE
) != 0)) {
499 * We have both the even and odd pairs.
500 * Since the routes are encountered in order,
501 * the route in the slot must be the even twin.
503 * Combine and promote (aggregate) the pair of routes.
505 if (seqno
< ag
->ag_seqno
)
506 seqno
= ag
->ag_seqno
;
507 if (!AG_IS_REDUN(state
))
508 state
&= ~AGS_REDUN1
;
509 if (AG_IS_REDUN(ag
->ag_state
))
512 state
&= ~AGS_REDUN0
;
513 state
|= (ag
->ag_state
& AGS_AGGREGATE_EITHER
);
514 if (ag
->ag_tag
!= tag
)
516 if (ag
->ag_nhop
!= nhop
)
520 * Get rid of the even twin that was already
525 } else if (ag
->ag_pref
>= pref
&&
526 (ag
->ag_state
& AGS_AGGREGATE
)) {
528 * If we cannot combine the pair, maybe the route
529 * with the worse metric can be promoted.
531 * Promote the old, even twin, by giving its slot
532 * in the table to the new, odd twin.
553 * The promoted route is even-redundant only if the
554 * even twin was fully redundant. It is not
555 * odd-redundant because the odd-twin will still be
559 if (!AG_IS_REDUN(tmp
))
562 ag
->ag_state
= state
;
566 ag
->ag_metric
= metric
;
573 /* take the newest sequence number */
574 if (seqno
<= ag
->ag_seqno
)
575 seqno
= ag
->ag_seqno
;
577 ag
->ag_seqno
= seqno
;
580 if (!(state
& AGS_AGGREGATE
))
581 break; /* cannot promote either twin */
584 * Promote the new, odd twin by shaving its
586 * The promoted route is odd-redundant only if the
587 * odd twin was fully redundant. It is not
588 * even-redundant because the even twin is still in
591 if (!AG_IS_REDUN(state
))
592 state
&= ~AGS_REDUN1
;
593 state
&= ~AGS_REDUN0
;
594 if (seqno
< ag
->ag_seqno
)
595 seqno
= ag
->ag_seqno
;
597 ag
->ag_seqno
= seqno
;
603 if (ag_cors
== NULL
) {
608 ag_cors
= ag
->ag_cors
;
612 * When we can no longer promote and combine routes,
613 * flush the old route in the target slot. Also flush
614 * any finer routes that we know will never be aggregated by
617 * In case we moved toward coarser masks,
618 * get back where we belong
620 if (ag
!= NULL
&& ag
->ag_mask
< mask
) {
625 /* Empty the target slot */
626 if (ag
!= NULL
&& ag
->ag_mask
== mask
) {
627 ag_flush(ag
->ag_dst_h
, ag
->ag_mask
, out
);
628 ag
= (ag_cors
== NULL
) ? ag_corsest
: ag_cors
->ag_fine
;
632 if (ag
== NULL
&& ag_cors
!= ag_finest
)
634 if (ag_cors
== NULL
&& ag
!= ag_corsest
)
636 if (ag
!= NULL
&& ag
->ag_cors
!= ag_cors
)
638 if (ag_cors
!= NULL
&& ag_cors
->ag_fine
!= ag
)
643 /* Save the new route on the end of the table. */
645 ag_avail
= nag
->ag_fine
;
652 nag
->ag_metric
= metric
;
655 nag
->ag_state
= state
;
656 nag
->ag_seqno
= seqno
;
663 nag
->ag_cors
= ag_cors
;
667 ag_cors
->ag_fine
= nag
;
673 rtm_type_name(uchar_t type
)
675 static const char *rtm_types
[] = {
693 #define NEW_RTM_PAT "RTM type %#x"
694 static char name0
[sizeof (NEW_RTM_PAT
) + 2];
696 if (type
> sizeof (rtm_types
) / sizeof (rtm_types
[0]) || type
== 0) {
697 (void) snprintf(name0
, sizeof (name0
), NEW_RTM_PAT
, type
);
700 return (rtm_types
[type
-1]);
707 dump_rt_msg(const char *act
, struct rt_msghdr
*rtm
, int mlen
)
712 char buffer
[16*3 + 1], *ibs
;
713 struct ifa_msghdr
*ifam
;
714 struct if_msghdr
*ifm
;
716 switch (rtm
->rtm_type
) {
730 trace_misc("%s %s %d bytes", act
, mtype
, mlen
);
731 if (mlen
> rtm
->rtm_msglen
) {
732 trace_misc("%s: extra %d bytes ignored", mtype
,
733 mlen
- rtm
->rtm_msglen
);
734 mlen
= rtm
->rtm_msglen
;
735 } else if (mlen
< rtm
->rtm_msglen
) {
736 trace_misc("%s: truncated by %d bytes", mtype
,
737 rtm
->rtm_msglen
- mlen
);
739 switch (rtm
->rtm_type
) {
744 ifam
= (struct ifa_msghdr
*)rtm
;
745 trace_misc("ifam: msglen %d version %d type %d addrs %X",
746 ifam
->ifam_msglen
, ifam
->ifam_version
, ifam
->ifam_type
,
748 trace_misc("ifam: flags %X index %d metric %d",
749 ifam
->ifam_flags
, ifam
->ifam_index
, ifam
->ifam_metric
);
750 cp
= (uchar_t
*)(ifam
+ 1);
753 ifm
= (struct if_msghdr
*)rtm
;
754 trace_misc("ifm: msglen %d version %d type %d addrs %X",
755 ifm
->ifm_msglen
, ifm
->ifm_version
, ifm
->ifm_type
,
757 ibs
= if_bit_string(ifm
->ifm_flags
, _B_TRUE
);
759 trace_misc("ifm: flags %#x index %d", ifm
->ifm_flags
,
762 trace_misc("ifm: flags %s index %d", ibs
,
766 cp
= (uchar_t
*)(ifm
+ 1);
769 trace_misc("rtm: msglen %d version %d type %d index %d",
770 rtm
->rtm_msglen
, rtm
->rtm_version
, rtm
->rtm_type
,
772 trace_misc("rtm: flags %X addrs %X pid %d seq %d",
773 rtm
->rtm_flags
, rtm
->rtm_addrs
, rtm
->rtm_pid
, rtm
->rtm_seq
);
774 trace_misc("rtm: errno %d use %d inits %X", rtm
->rtm_errno
,
775 rtm
->rtm_use
, rtm
->rtm_inits
);
776 cp
= (uchar_t
*)(rtm
+ 1);
779 i
= mlen
- (cp
- (uint8_t *)rtm
);
783 for (j
= 0; j
< 16 && i
> 0; j
++, i
--)
784 ibs
+= sprintf(ibs
, " %02X", *cp
++);
785 trace_misc("addr%s", buffer
);
790 * Tell the kernel to add, delete or change a route
791 * Pass k_state from khash in for diagnostic info.
794 rtioctl(int action
, /* RTM_DELETE, etc */
798 struct interface
*ifp
,
802 static int rt_sock_seqno
= 0;
804 struct rt_msghdr w_rtm
;
805 struct sockaddr_in w_dst
;
806 struct sockaddr_in w_gate
;
807 uint8_t w_space
[512];
809 struct sockaddr_in w_mask
;
810 struct sockaddr_dl w_ifp
;
813 #define PAT " %-10s %s metric=%d flags=%#x"
814 #define ARGS rtm_type_name(action), rtname(dst, mask, gate), metric, flags
817 (void) memset(&w
, 0, sizeof (w
));
818 (void) memset(&w_mask
, 0, sizeof (w_mask
));
819 (void) memset(&w_ifp
, 0, sizeof (w_ifp
));
821 w
.w_rtm
.rtm_msglen
= sizeof (struct rt_msghdr
) +
822 2 * ROUNDUP_LONG(sizeof (struct sockaddr_in
));
823 w
.w_rtm
.rtm_version
= RTM_VERSION
;
824 w
.w_rtm
.rtm_type
= action
;
825 w
.w_rtm
.rtm_flags
= flags
;
826 w
.w_rtm
.rtm_seq
= ++rt_sock_seqno
;
827 w
.w_rtm
.rtm_addrs
= RTA_DST
|RTA_GATEWAY
;
828 if (metric
!= 0 || action
== RTM_CHANGE
) {
829 w
.w_rtm
.rtm_rmx
.rmx_hopcount
= metric
;
830 w
.w_rtm
.rtm_inits
|= RTV_HOPCOUNT
;
832 w
.w_dst
.sin_family
= AF_INET
;
833 w
.w_dst
.sin_addr
.s_addr
= dst
;
834 w
.w_gate
.sin_family
= AF_INET
;
835 w
.w_gate
.sin_addr
.s_addr
= gate
;
836 if (mask
== HOST_MASK
) {
837 w
.w_rtm
.rtm_flags
|= RTF_HOST
;
839 w
.w_rtm
.rtm_addrs
|= RTA_NETMASK
;
840 w_mask
.sin_family
= AF_INET
;
841 w_mask
.sin_addr
.s_addr
= htonl(mask
);
842 (void) memmove(cp
, &w_mask
, sizeof (w_mask
));
843 cp
+= ROUNDUP_LONG(sizeof (struct sockaddr_in
));
844 w
.w_rtm
.rtm_msglen
+= ROUNDUP_LONG(sizeof (struct sockaddr_in
));
847 ifp
= iflookup(gate
);
849 if (ifp
== NULL
|| (ifp
->int_phys
== NULL
)) {
850 trace_misc("no ifp for" PAT
, ARGS
);
852 if (ifp
->int_phys
->phyi_index
> UINT16_MAX
) {
853 trace_misc("ifindex %d is too big for sdl_index",
854 ifp
->int_phys
->phyi_index
);
856 w_ifp
.sdl_family
= AF_LINK
;
857 w
.w_rtm
.rtm_addrs
|= RTA_IFP
;
858 w_ifp
.sdl_index
= ifp
->int_phys
->phyi_index
;
859 (void) memmove(cp
, &w_ifp
, sizeof (w_ifp
));
860 w
.w_rtm
.rtm_msglen
+=
861 ROUNDUP_LONG(sizeof (struct sockaddr_dl
));
868 dump_rt_msg("write", &w
.w_rtm
, w
.w_rtm
.rtm_msglen
);
869 cc
= write(rt_sock
, &w
, w
.w_rtm
.rtm_msglen
);
871 if (errno
== ESRCH
&& (action
== RTM_CHANGE
||
872 action
== RTM_DELETE
)) {
873 trace_act("route disappeared before" PAT
, ARGS
);
874 if (action
== RTM_CHANGE
) {
880 writelog(LOG_WARNING
, "write(rt_sock)" PAT
": %s ",
881 ARGS
, rip_strerror(errno
));
883 } else if (cc
!= w
.w_rtm
.rtm_msglen
) {
884 msglog("write(rt_sock) wrote %ld instead of %d for" PAT
,
885 cc
, w
.w_rtm
.rtm_msglen
, ARGS
);
890 trace_misc("write kernel" PAT
, ARGS
);
896 /* Hash table containing our image of the kernel forwarding table. */
897 #define KHASH_SIZE 71 /* should be prime */
898 #define KHASH(a, m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
899 static struct khash
*khash_bins
[KHASH_SIZE
];
901 #define K_KEEP_LIM 30 /* k_keep */
903 static struct khash
*
904 kern_find(in_addr_t dst
, in_addr_t mask
, in_addr_t gate
,
905 struct interface
*ifp
, struct khash
***ppk
)
907 struct khash
*k
, **pk
;
909 for (pk
= &KHASH(dst
, mask
); (k
= *pk
) != NULL
; pk
= &k
->k_next
) {
910 if (k
->k_dst
== dst
&& k
->k_mask
== mask
&&
911 (gate
== 0 || k
->k_gate
== gate
) &&
912 (ifp
== NULL
|| k
->k_ifp
== ifp
)) {
923 * Find out if there is an alternate route to a given destination
924 * off of a given interface.
926 static struct khash
*
927 kern_alternate(in_addr_t dst
, in_addr_t mask
, in_addr_t gate
,
928 struct interface
*ifp
, struct khash
***ppk
)
930 struct khash
*k
, **pk
;
932 for (pk
= &KHASH(dst
, mask
); (k
= *pk
) != NULL
; pk
= &k
->k_next
) {
933 if (k
->k_dst
== dst
&& k
->k_mask
== mask
&&
934 (k
->k_gate
!= gate
) &&
944 static struct khash
*
945 kern_add(in_addr_t dst
, uint32_t mask
, in_addr_t gate
, struct interface
*ifp
)
947 struct khash
*k
, **pk
;
949 k
= kern_find(dst
, mask
, gate
, ifp
, &pk
);
953 k
= rtmalloc(sizeof (*k
), "kern_add");
955 (void) memset(k
, 0, sizeof (*k
));
959 k
->k_keep
= now
.tv_sec
;
967 /* delete all khash entries that are wired through the interface ifp */
969 kern_flush_ifp(struct interface
*ifp
)
971 struct khash
*k
, *kprev
, *knext
;
974 for (i
= 0; i
< KHASH_SIZE
; i
++) {
976 for (k
= khash_bins
[i
]; k
!= NULL
; k
= knext
) {
978 if (k
->k_ifp
== ifp
) {
980 kprev
->k_next
= k
->k_next
;
982 khash_bins
[i
] = k
->k_next
;
992 * rewire khash entries that currently go through oldifp to
996 kern_rewire_ifp(struct interface
*oldifp
, struct interface
*newifp
)
1001 for (i
= 0; i
< KHASH_SIZE
; i
++) {
1002 for (k
= khash_bins
[i
]; k
; k
= k
->k_next
) {
1003 if (k
->k_ifp
== oldifp
) {
1005 trace_misc("kern_rewire_ifp k 0x%lx "
1006 "from %s to %s", k
, oldifp
->int_name
,
1014 * Check that a static route it is still in the daemon table, and not
1015 * deleted by interfaces coming and going. This is also the routine
1016 * responsible for adding new static routes to the daemon table.
1019 kern_check_static(struct khash
*k
, struct interface
*ifp
)
1021 struct rt_entry
*rt
;
1022 struct rt_spare
new;
1023 uint16_t rt_state
= RS_STATIC
;
1025 (void) memset(&new, 0, sizeof (new));
1027 new.rts_gate
= k
->k_gate
;
1028 new.rts_router
= (ifp
!= NULL
) ? ifp
->int_addr
: loopaddr
;
1029 new.rts_metric
= k
->k_metric
;
1030 new.rts_time
= now
.tv_sec
;
1031 new.rts_origin
= RO_STATIC
;
1033 rt
= rtget(k
->k_dst
, k
->k_mask
);
1034 if ((ifp
!= NULL
&& !IS_IFF_ROUTING(ifp
->int_if_flags
)) ||
1035 (k
->k_state
& KS_PRIVATE
))
1036 rt_state
|= RS_NOPROPAGATE
;
1039 if ((rt
->rt_state
& RS_STATIC
) == 0) {
1041 * We are already tracking this dest/mask
1042 * via RIP/RDISC. Ignore the static route,
1043 * because we don't currently have a good
1044 * way to compare metrics on static routes
1045 * with rip metrics, and therefore cannot
1046 * mix and match the two.
1050 rt_state
|= rt
->rt_state
;
1051 if (rt
->rt_state
!= rt_state
)
1052 rtchange(rt
, rt_state
, &new, 0);
1054 rtadd(k
->k_dst
, k
->k_mask
, rt_state
, &new);
1059 /* operate on a kernel entry */
1061 kern_ioctl(struct khash
*k
,
1062 int action
, /* RTM_DELETE, etc */
1065 if (((k
->k_state
& (KS_IF
|KS_PASSIVE
)) == KS_IF
) ||
1066 (k
->k_state
& KS_DEPRE_IF
)) {
1068 * Prevent execution of RTM_DELETE, RTM_ADD or
1069 * RTM_CHANGE of interface routes
1071 trace_act("Blocking execution of %s %s --> %s ",
1072 rtm_type_name(action
),
1073 addrname(k
->k_dst
, k
->k_mask
, 0), naddr_ntoa(k
->k_gate
));
1079 k
->k_state
&= ~KS_DYNAMIC
;
1080 if (k
->k_state
& KS_DELETED
)
1082 k
->k_state
|= KS_DELETED
;
1085 k
->k_state
&= ~KS_DELETED
;
1088 if (k
->k_state
& KS_DELETED
) {
1090 k
->k_state
&= ~KS_DELETED
;
1096 * We should be doing an RTM_CHANGE for a KS_CHANGE, but
1097 * RTM_CHANGE in the kernel is not currently multipath-aware and
1098 * assumes that RTF_GATEWAY implies that the gateway of the route for
1099 * dst has to be changed. Moreover, the only change that in.routed
1100 * wants to implement is a change in the ks_metric (rmx_hopcount)
1101 * which the kernel ignores anway, so we skip the RTM_CHANGE operation
1104 if (action
!= RTM_CHANGE
) {
1105 rtioctl(action
, k
->k_dst
, k
->k_gate
, k
->k_mask
, k
->k_ifp
,
1106 k
->k_metric
, flags
);
1111 /* add a route the kernel told us */
1113 rtm_add(struct rt_msghdr
*rtm
,
1114 struct rt_addrinfo
*info
,
1116 boolean_t interf_route
,
1117 struct interface
*ifptr
)
1120 struct interface
*ifp
= ifptr
;
1121 in_addr_t mask
, gate
= 0;
1122 static struct msg_limit msg_no_ifp
;
1124 if (rtm
->rtm_flags
& RTF_HOST
) {
1126 } else if (INFO_MASK(info
) != 0) {
1127 mask
= ntohl(S_ADDR(INFO_MASK(info
)));
1129 writelog(LOG_WARNING
,
1130 "ignore %s without mask", rtm_type_name(rtm
->rtm_type
));
1135 * Find the interface toward the gateway.
1137 if (INFO_GATE(info
) != NULL
)
1138 gate
= S_ADDR(INFO_GATE(info
));
1141 if (INFO_GATE(info
) != NULL
)
1142 ifp
= iflookup(gate
);
1144 msglim(&msg_no_ifp
, gate
,
1145 "route %s --> %s nexthop is not directly connected",
1146 addrname(S_ADDR(INFO_DST(info
)), mask
, 0),
1151 k
= kern_add(S_ADDR(INFO_DST(info
)), mask
, gate
, ifp
);
1153 if (k
->k_state
& KS_NEW
)
1154 k
->k_keep
= now
.tv_sec
+keep
;
1155 if (INFO_GATE(info
) == 0) {
1156 trace_act("note %s without gateway",
1157 rtm_type_name(rtm
->rtm_type
));
1158 k
->k_metric
= HOPCNT_INFINITY
;
1159 } else if (INFO_GATE(info
)->ss_family
!= AF_INET
) {
1160 trace_act("note %s with gateway AF=%d",
1161 rtm_type_name(rtm
->rtm_type
),
1162 INFO_GATE(info
)->ss_family
);
1163 k
->k_metric
= HOPCNT_INFINITY
;
1165 k
->k_gate
= S_ADDR(INFO_GATE(info
));
1166 k
->k_metric
= rtm
->rtm_rmx
.rmx_hopcount
;
1167 if (k
->k_metric
< 0)
1169 else if (k
->k_metric
> HOPCNT_INFINITY
-1)
1170 k
->k_metric
= HOPCNT_INFINITY
-1;
1173 if ((k
->k_state
& KS_NEW
) && interf_route
) {
1174 if (k
->k_gate
!= 0 && findifaddr(k
->k_gate
) == NULL
)
1175 k
->k_state
|= KS_DEPRE_IF
;
1177 k
->k_state
|= KS_IF
;
1180 k
->k_state
&= ~(KS_NEW
| KS_DELETE
| KS_ADD
| KS_CHANGE
| KS_DEL_ADD
|
1181 KS_STATIC
| KS_GATEWAY
| KS_DELETED
| KS_PRIVATE
| KS_CHECK
);
1182 if (rtm
->rtm_flags
& RTF_GATEWAY
)
1183 k
->k_state
|= KS_GATEWAY
;
1184 if (rtm
->rtm_flags
& RTF_STATIC
)
1185 k
->k_state
|= KS_STATIC
;
1186 if (rtm
->rtm_flags
& RTF_PRIVATE
)
1187 k
->k_state
|= KS_PRIVATE
;
1190 if (rtm
->rtm_flags
& (RTF_DYNAMIC
| RTF_MODIFIED
)) {
1191 if (INFO_AUTHOR(info
) != 0 &&
1192 INFO_AUTHOR(info
)->ss_family
== AF_INET
)
1193 ifp
= iflookup(S_ADDR(INFO_AUTHOR(info
)));
1196 if (should_supply(ifp
) && (ifp
== NULL
||
1197 !(ifp
->int_state
& IS_REDIRECT_OK
))) {
1199 * Routers are not supposed to listen to redirects,
1200 * so delete it if it came via an unknown interface
1201 * or the interface does not have special permission.
1203 k
->k_state
&= ~KS_DYNAMIC
;
1204 k
->k_state
|= KS_DELETE
;
1205 LIM_SEC(need_kern
, 0);
1206 trace_act("mark for deletion redirected %s --> %s"
1208 addrname(k
->k_dst
, k
->k_mask
, 0),
1209 naddr_ntoa(k
->k_gate
),
1210 ifp
? ifp
->int_name
: "unknown interface");
1212 k
->k_state
|= KS_DYNAMIC
;
1213 k
->k_redirect_time
= now
.tv_sec
;
1214 trace_act("accept redirected %s --> %s via %s",
1215 addrname(k
->k_dst
, k
->k_mask
, 0),
1216 naddr_ntoa(k
->k_gate
),
1217 ifp
? ifp
->int_name
: "unknown interface");
1223 * If it is not a static route, quit until the next comparison
1224 * between the kernel and daemon tables, when it will be deleted.
1226 if (!(k
->k_state
& KS_STATIC
)) {
1227 if (!(k
->k_state
& (KS_IF
|KS_DEPRE_IF
|KS_FILE
)))
1228 k
->k_state
|= KS_DELETE
;
1229 LIM_SEC(need_kern
, k
->k_keep
);
1234 * Put static routes with real metrics into the daemon table so
1235 * they can be advertised.
1238 kern_check_static(k
, ifp
);
1242 /* deal with packet loss */
1244 rtm_lose(struct rt_msghdr
*rtm
, struct rt_addrinfo
*info
)
1246 struct rt_spare
new, *rts
, *losing_rts
= NULL
;
1247 struct rt_entry
*rt
;
1250 if (INFO_GATE(info
) == NULL
|| INFO_GATE(info
)->ss_family
!= AF_INET
) {
1251 trace_act("ignore %s without gateway",
1252 rtm_type_name(rtm
->rtm_type
));
1257 rt
= rtfind(S_ADDR(INFO_DST(info
)));
1260 for (i
= 0; i
< rt
->rt_num_spares
; i
++) {
1261 rts
= &rt
->rt_spares
[i
];
1262 if (rts
->rts_gate
== S_ADDR(INFO_GATE(info
))) {
1266 if (rts
->rts_gate
!= 0 && rts
->rts_ifp
!= &dummy_ifp
)
1270 if (rt
== NULL
|| losing_rts
== NULL
) {
1271 trace_act("Ignore RTM_LOSING because no route found"
1272 " for %s through %s",
1273 naddr_ntoa(S_ADDR(INFO_DST(info
))),
1274 naddr_ntoa(S_ADDR(INFO_GATE(info
))));
1278 trace_act("Got RTM_LOSING, but no alternatives to gw %s."
1279 " deprecating route to metric 15",
1280 naddr_ntoa(S_ADDR(INFO_GATE(info
))));
1282 new.rts_metric
= HOPCNT_INFINITY
- 1;
1283 rtchange(rt
, rt
->rt_state
, &new, 0);
1286 trace_act("Got RTM_LOSING. Found a route with %d alternates", spares
);
1288 rdisc_age(S_ADDR(INFO_GATE(info
)));
1289 age(S_ADDR(INFO_GATE(info
)));
1294 * Make the gateway slot of an info structure point to something
1295 * useful. If it is not already useful, but it specifies an interface,
1296 * then fill in the sockaddr_in provided and point it there.
1299 get_info_gate(struct sockaddr_storage
**ssp
, struct sockaddr_in
*sin
)
1301 struct sockaddr_dl
*sdl
= (struct sockaddr_dl
*)*ssp
;
1302 struct interface
*ifp
;
1306 if ((sdl
)->sdl_family
== AF_INET
)
1308 if ((sdl
)->sdl_family
!= AF_LINK
)
1311 ifp
= ifwithindex(sdl
->sdl_index
, _B_TRUE
);
1315 sin
->sin_addr
.s_addr
= ifp
->int_addr
;
1316 sin
->sin_family
= AF_INET
;
1318 *ssp
= (struct sockaddr_storage
*)sin
;
1325 * Clean the kernel table by copying it to the daemon image.
1326 * Eventually the daemon will delete any extra routes.
1334 struct T_optmgmt_req req
;
1338 struct T_optmgmt_ack ack
;
1339 unsigned char space
[64];
1342 struct strbuf cbuf
, dbuf
;
1343 int ipfd
, nroutes
, flags
, r
;
1344 mib2_ipRouteEntry_t routes
[8];
1345 mib2_ipRouteEntry_t
*rp
;
1346 struct rt_msghdr rtm
;
1347 struct rt_addrinfo info
;
1348 struct sockaddr_in sin_dst
;
1349 struct sockaddr_in sin_gate
;
1350 struct sockaddr_in sin_mask
;
1351 struct sockaddr_in sin_author
;
1352 struct interface
*ifp
;
1353 char ifname
[LIFNAMSIZ
+ 1];
1355 for (i
= 0; i
< KHASH_SIZE
; i
++) {
1356 for (k
= khash_bins
[i
]; k
!= NULL
; k
= k
->k_next
) {
1357 if (!(k
->k_state
& (KS_IF
|KS_DEPRE_IF
)))
1358 k
->k_state
|= KS_CHECK
;
1362 ipfd
= open(IP_DEV_NAME
, O_RDWR
);
1364 msglog("open " IP_DEV_NAME
": %s", rip_strerror(errno
));
1368 req
.req
.PRIM_type
= T_OPTMGMT_REQ
;
1369 req
.req
.OPT_offset
= (caddr_t
)&req
.hdr
- (caddr_t
)&req
;
1370 req
.req
.OPT_length
= sizeof (req
.hdr
);
1371 req
.req
.MGMT_flags
= T_CURRENT
;
1373 req
.hdr
.level
= MIB2_IP
;
1377 cbuf
.buf
= (caddr_t
)&req
;
1378 cbuf
.len
= sizeof (req
);
1380 if (putmsg(ipfd
, &cbuf
, NULL
, 0) == -1) {
1381 msglog("T_OPTMGMT_REQ putmsg: %s", rip_strerror(errno
));
1386 cbuf
.buf
= (caddr_t
)&ack
;
1387 cbuf
.maxlen
= sizeof (ack
);
1388 dbuf
.buf
= (caddr_t
)routes
;
1389 dbuf
.maxlen
= sizeof (routes
);
1391 r
= getmsg(ipfd
, &cbuf
, &dbuf
, &flags
);
1393 msglog("T_OPTMGMT_REQ getmsg: %s", rip_strerror(errno
));
1397 if (cbuf
.len
< sizeof (struct T_optmgmt_ack
) ||
1398 ack
.ack
.PRIM_type
!= T_OPTMGMT_ACK
||
1399 ack
.ack
.MGMT_flags
!= T_SUCCESS
||
1400 ack
.ack
.OPT_length
< sizeof (struct opthdr
)) {
1401 msglog("bad T_OPTMGMT response; len=%d prim=%d "
1402 "flags=%d optlen=%d", cbuf
.len
, ack
.ack
.PRIM_type
,
1403 ack
.ack
.MGMT_flags
, ack
.ack
.OPT_length
);
1407 rh
= (struct opthdr
*)((caddr_t
)&ack
+ ack
.ack
.OPT_offset
);
1408 if (rh
->level
== 0 && rh
->name
== 0) {
1411 if (rh
->level
!= MIB2_IP
|| rh
->name
!= MIB2_IP_21
) {
1412 while (r
== MOREDATA
) {
1413 r
= getmsg(ipfd
, NULL
, &dbuf
, &flags
);
1420 (void) memset(&rtm
, 0, sizeof (rtm
));
1421 (void) memset(&info
, 0, sizeof (info
));
1422 (void) memset(&sin_dst
, 0, sizeof (sin_dst
));
1423 (void) memset(&sin_gate
, 0, sizeof (sin_gate
));
1424 (void) memset(&sin_mask
, 0, sizeof (sin_mask
));
1425 (void) memset(&sin_author
, 0, sizeof (sin_author
));
1426 sin_dst
.sin_family
= AF_INET
;
1428 info
.rti_info
[RTAX_DST
] = (struct sockaddr_storage
*)&sin_dst
;
1429 sin_gate
.sin_family
= AF_INET
;
1431 info
.rti_info
[RTAX_GATEWAY
] = (struct sockaddr_storage
*)&sin_gate
;
1432 sin_mask
.sin_family
= AF_INET
;
1434 info
.rti_info
[RTAX_NETMASK
] = (struct sockaddr_storage
*)&sin_mask
;
1435 sin_dst
.sin_family
= AF_INET
;
1437 info
.rti_info
[RTAX_AUTHOR
] = (struct sockaddr_storage
*)&sin_author
;
1440 nroutes
= dbuf
.len
/ sizeof (mib2_ipRouteEntry_t
);
1441 for (rp
= routes
; nroutes
> 0; ++rp
, nroutes
--) {
1444 * Ignore IRE cache, broadcast, and local address
1445 * entries; they're not subject to routing socket
1448 if (rp
->ipRouteInfo
.re_ire_type
&
1449 (IRE_BROADCAST
| IRE_CACHE
| IRE_LOCAL
))
1452 /* ignore multicast and link local addresses */
1453 if (IN_MULTICAST(ntohl(rp
->ipRouteDest
)) ||
1454 IN_LINKLOCAL(ntohl(rp
->ipRouteDest
))) {
1459 #ifdef DEBUG_KERNEL_ROUTE_READ
1460 (void) fprintf(stderr
, "route type %d, ire type %08X, "
1461 "flags %08X: %s", rp
->ipRouteType
,
1462 rp
->ipRouteInfo
.re_ire_type
,
1463 rp
->ipRouteInfo
.re_flags
,
1464 naddr_ntoa(rp
->ipRouteDest
));
1465 (void) fprintf(stderr
, " %s",
1466 naddr_ntoa(rp
->ipRouteMask
));
1467 (void) fprintf(stderr
, " %s\n",
1468 naddr_ntoa(rp
->ipRouteNextHop
));
1471 /* Fake up the needed entries */
1472 rtm
.rtm_flags
= rp
->ipRouteInfo
.re_flags
;
1473 rtm
.rtm_type
= RTM_GET
;
1474 rtm
.rtm_rmx
.rmx_hopcount
= rp
->ipRouteMetric1
;
1476 (void) memset(ifname
, 0, sizeof (ifname
));
1477 if (rp
->ipRouteIfIndex
.o_length
<
1478 sizeof (rp
->ipRouteIfIndex
.o_bytes
))
1479 rp
->ipRouteIfIndex
.o_bytes
[
1480 rp
->ipRouteIfIndex
.o_length
] = '\0';
1481 (void) strncpy(ifname
, rp
->ipRouteIfIndex
.o_bytes
,
1485 * First try to match up on gwkludge entries
1486 * before trying to match ifp by name/nexthop.
1488 if ((ifp
= gwkludge_iflookup(rp
->ipRouteDest
,
1490 ntohl(rp
->ipRouteMask
))) == NULL
) {
1491 ifp
= lifp_iflookup(rp
->ipRouteNextHop
, ifname
);
1494 #ifdef DEBUG_KERNEL_ROUTE_READ
1496 (void) fprintf(stderr
, " found interface"
1497 " %-4s #%-3d ", ifp
->int_name
,
1498 (ifp
->int_phys
!= NULL
) ?
1499 ifp
->int_phys
->phyi_index
: 0);
1500 (void) fprintf(stderr
, "%-15s-->%-15s \n",
1501 naddr_ntoa(ifp
->int_addr
),
1502 addrname(((ifp
->int_if_flags
&
1504 ifp
->int_dstaddr
: htonl(ifp
->int_net
)),
1509 info
.rti_addrs
= RTA_DST
| RTA_GATEWAY
| RTA_NETMASK
;
1510 if (rp
->ipRouteInfo
.re_ire_type
& IRE_HOST_REDIRECT
)
1511 info
.rti_addrs
|= RTA_AUTHOR
;
1512 sin_dst
.sin_addr
.s_addr
= rp
->ipRouteDest
;
1513 sin_gate
.sin_addr
.s_addr
= rp
->ipRouteNextHop
;
1514 sin_mask
.sin_addr
.s_addr
= rp
->ipRouteMask
;
1515 sin_author
.sin_addr
.s_addr
=
1516 rp
->ipRouteInfo
.re_src_addr
;
1519 * Note static routes and interface routes, and also
1520 * preload the image of the kernel table so that
1521 * we can later clean it, as well as avoid making
1522 * unneeded changes. Keep the old kernel routes for a
1523 * few seconds to allow a RIP or router-discovery
1524 * response to be heard.
1526 rtm_add(&rtm
, &info
, MAX_WAITTIME
,
1527 ((rp
->ipRouteInfo
.re_ire_type
&
1528 (IRE_INTERFACE
|IRE_LOOPBACK
)) != 0), ifp
);
1533 r
= getmsg(ipfd
, NULL
, &dbuf
, &flags
);
1539 for (i
= 0; i
< KHASH_SIZE
; i
++) {
1540 for (k
= khash_bins
[i
]; k
!= NULL
; k
= k
->k_next
) {
1543 * KS_DELETED routes have been removed from the
1544 * kernel, but we keep them around for reasons
1545 * stated in del_static(), so we skip the check
1546 * for KS_DELETED routes here.
1548 if ((k
->k_state
& (KS_CHECK
|KS_DELETED
)) == KS_CHECK
) {
1550 if (!(k
->k_state
& KS_DYNAMIC
)) {
1551 writelog(LOG_WARNING
,
1552 "%s --> %s disappeared from kernel",
1553 addrname(k
->k_dst
, k
->k_mask
, 0),
1554 naddr_ntoa(k
->k_gate
));
1556 del_static(k
->k_dst
, k
->k_mask
, k
->k_gate
,
1565 /* Listen to announcements from the kernel */
1570 struct interface
*ifp
;
1571 struct sockaddr_in gate_sin
;
1572 in_addr_t mask
, gate
;
1575 struct rt_msghdr rtm
;
1576 struct sockaddr_storage addrs
[RTA_NUMBITS
];
1578 struct if_msghdr ifm
;
1580 char str
[100], *strp
;
1581 struct rt_addrinfo info
;
1585 cc
= read(rt_sock
, &m
, sizeof (m
));
1587 if (cc
< 0 && errno
!= EWOULDBLOCK
)
1588 LOGERR("read(rt_sock)");
1593 dump_rt_msg("read", &m
.r
.rtm
, cc
);
1595 if (cc
< m
.r
.rtm
.rtm_msglen
) {
1596 msglog("routing message truncated (%d < %d)",
1597 cc
, m
.r
.rtm
.rtm_msglen
);
1600 if (m
.r
.rtm
.rtm_version
!= RTM_VERSION
) {
1601 msglog("bogus routing message version %d",
1602 m
.r
.rtm
.rtm_version
);
1608 if (m
.r
.rtm
.rtm_type
== RTM_IFINFO
||
1609 m
.r
.rtm
.rtm_type
== RTM_NEWADDR
||
1610 m
.r
.rtm
.rtm_type
== RTM_DELADDR
) {
1611 strp
= if_bit_string(m
.ifm
.ifm_flags
, _B_TRUE
);
1614 (void) sprintf(str
, "%#x", m
.ifm
.ifm_flags
);
1616 ifp
= ifwithindex(m
.ifm
.ifm_index
,
1617 m
.r
.rtm
.rtm_type
!= RTM_DELADDR
);
1619 char ifname
[LIFNAMSIZ
], *ifnamep
;
1621 ifnamep
= if_indextoname(m
.ifm
.ifm_index
,
1623 if (ifnamep
== NULL
) {
1624 trace_act("note %s with flags %s"
1625 " for unknown interface index #%d",
1626 rtm_type_name(m
.r
.rtm
.rtm_type
),
1627 strp
, m
.ifm
.ifm_index
);
1629 trace_act("note %s with flags %s"
1630 " for unknown interface %s",
1631 rtm_type_name(m
.r
.rtm
.rtm_type
),
1635 trace_act("note %s with flags %s for %s",
1636 rtm_type_name(m
.r
.rtm
.rtm_type
),
1637 strp
, ifp
->int_name
);
1643 * After being informed of a change to an interface,
1644 * check them all now if the check would otherwise
1645 * be a long time from now, if the interface is
1646 * not known, or if the interface has been turned
1649 if (ifscan_timer
.tv_sec
-now
.tv_sec
>=
1650 CHECK_BAD_INTERVAL
|| ifp
== NULL
||
1651 ((ifp
->int_if_flags
^ m
.ifm
.ifm_flags
) &
1653 ifscan_timer
.tv_sec
= now
.tv_sec
;
1655 } else if (m
.r
.rtm
.rtm_type
== RTM_CHGADDR
||
1656 m
.r
.rtm
.rtm_type
== RTM_FREEADDR
) {
1659 if (m
.r
.rtm
.rtm_index
!= 0)
1660 ifp
= ifwithindex(m
.r
.rtm
.rtm_index
, 1);
1663 (void) strlcpy(str
, rtm_type_name(m
.r
.rtm
.rtm_type
),
1665 strp
= &str
[strlen(str
)];
1666 if (m
.r
.rtm
.rtm_type
<= RTM_CHANGE
)
1667 strp
+= snprintf(strp
, sizeof (str
) - (strp
- str
),
1668 " from pid %d", (int)m
.r
.rtm
.rtm_pid
);
1671 (void) rt_xaddrs(&info
, (struct sockaddr_storage
*)(&m
.r
.rtm
+
1672 1), (char *)&m
+ cc
, m
.r
.rtm
.rtm_addrs
);
1674 if (INFO_DST(&info
) == 0) {
1675 trace_act("ignore %s without dst", str
);
1679 if (INFO_DST(&info
)->ss_family
!= AF_INET
) {
1680 trace_act("ignore %s for AF %d", str
,
1681 INFO_DST(&info
)->ss_family
);
1685 mask
= ((INFO_MASK(&info
) != 0) ?
1686 ntohl(S_ADDR(INFO_MASK(&info
))) :
1687 (m
.r
.rtm
.rtm_flags
& RTF_HOST
) ?
1688 HOST_MASK
: std_mask(S_ADDR(INFO_DST(&info
))));
1690 strp
+= snprintf(strp
, sizeof (str
) - (strp
- str
), ": %s",
1691 addrname(S_ADDR(INFO_DST(&info
)), mask
, 0));
1693 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info
)))) ||
1694 IN_LINKLOCAL(ntohl(S_ADDR(INFO_DST(&info
))))) {
1695 trace_act("ignore multicast/link local %s", str
);
1699 if (m
.r
.rtm
.rtm_flags
& RTF_LLINFO
) {
1700 trace_act("ignore ARP %s", str
);
1704 if (get_info_gate(&INFO_GATE(&info
), &gate_sin
)) {
1705 gate
= S_ADDR(INFO_GATE(&info
));
1706 strp
+= snprintf(strp
, sizeof (str
) - (strp
- str
),
1707 " --> %s", naddr_ntoa(gate
));
1712 if (INFO_AUTHOR(&info
) != 0)
1713 strp
+= snprintf(strp
, sizeof (str
) - (strp
- str
),
1714 " by authority of %s",
1715 saddr_ntoa(INFO_AUTHOR(&info
)));
1717 switch (m
.r
.rtm
.rtm_type
) {
1721 if (m
.r
.rtm
.rtm_errno
!= 0) {
1722 trace_act("ignore %s with \"%s\" error",
1723 str
, rip_strerror(m
.r
.rtm
.rtm_errno
));
1725 trace_act("%s", str
);
1726 rtm_add(&m
.r
.rtm
, &info
, 0,
1727 !(m
.r
.rtm
.rtm_flags
& RTF_GATEWAY
) &&
1728 m
.r
.rtm
.rtm_type
!= RTM_REDIRECT
, ifp
);
1734 if (m
.r
.rtm
.rtm_errno
!= 0 &&
1735 m
.r
.rtm
.rtm_errno
!= ESRCH
) {
1736 trace_act("ignore %s with \"%s\" error",
1737 str
, rip_strerror(m
.r
.rtm
.rtm_errno
));
1739 trace_act("%s", str
);
1740 del_static(S_ADDR(INFO_DST(&info
)), mask
,
1746 trace_act("%s", str
);
1747 rtm_lose(&m
.r
.rtm
, &info
);
1751 trace_act("ignore %s", str
);
1759 * Disassemble a routing message. The result is an array of pointers
1760 * to sockaddr_storage structures stored in the info argument.
1762 * ss is a pointer to the beginning of the data following the
1763 * rt_msghdr contained in the routing socket message, which consists
1764 * of a string of concatenated sockaddr structure of different types.
1766 * Extended attributes can be appended at the end of the list.
1769 rt_xaddrs(struct rt_addrinfo
*info
,
1770 struct sockaddr_storage
*ss
,
1778 static int prev_complaints
;
1781 #define XBAD_SHORT 0x2
1782 #define XBAD_LONG 0x4
1784 (void) memset(info
, 0, sizeof (*info
));
1785 info
->rti_addrs
= addrs
;
1787 for (i
= 0, abit
= 1; i
< RTAX_MAX
&& (char *)ss
< lim
;
1789 if ((addrs
& abit
) == 0)
1791 info
->rti_info
[i
] = ss
;
1792 /* Horrible interface here */
1793 switch (ss
->ss_family
) {
1796 ss
= (struct sockaddr_storage
*)(
1797 (struct sockaddr_un
*)ss
+ 1);
1801 ss
= (struct sockaddr_storage
*)(
1802 (struct sockaddr_in
*)ss
+ 1);
1806 ss
= (struct sockaddr_storage
*)(
1807 (struct sockaddr_dl
*)ss
+ 1);
1811 ss
= (struct sockaddr_storage
*)(
1812 (struct sockaddr_in6
*)ss
+ 1);
1815 if (!(prev_complaints
& XBAD_AF
))
1816 writelog(LOG_WARNING
,
1817 "unknown address family %d "
1818 "encountered", ss
->ss_family
);
1819 if (complaints
& XBAD_AF
)
1822 ss
= (struct sockaddr_storage
*)(
1823 (struct sockaddr
*)ss
+ 1);
1824 complaints
|= XBAD_AF
;
1825 info
->rti_addrs
&= abit
- 1;
1826 addrs
= info
->rti_addrs
;
1830 if ((char *)ss
> lim
) {
1831 if (!(prev_complaints
& XBAD_SHORT
))
1832 msglog("sockaddr %d too short by %d "
1833 "bytes", i
+ 1, (char *)ss
- lim
);
1834 complaints
|= XBAD_SHORT
;
1835 info
->rti_info
[i
] = NULL
;
1836 info
->rti_addrs
&= abit
- 1;
1842 while (((char *)ss
+ sizeof (rtm_ext_t
)) <= lim
) {
1846 /* LINTED: alignment */
1847 tp
= (rtm_ext_t
*)ss
;
1848 nxt
= (char *)(tp
+ 1) + tp
->rtmex_len
;
1850 if (!IS_P2ALIGNED(tp
->rtmex_len
, sizeof (uint32_t)) ||
1855 /* LINTED: alignment */
1856 ss
= (struct sockaddr_storage
*)nxt
;
1859 if ((char *)ss
!= lim
) {
1860 if ((char *)ss
> lim
) {
1861 if (!(prev_complaints
& XBAD_SHORT
))
1862 msglog("routing message too short by %d bytes",
1864 complaints
|= XBAD_SHORT
;
1865 } else if (!(prev_complaints
& XBAD_LONG
)) {
1866 msglog("%d bytes of routing message left over",
1868 complaints
|= XBAD_LONG
;
1873 prev_complaints
= complaints
;
1877 /* after aggregating, note routes that belong in the kernel */
1879 kern_out(struct ag_info
*ag
)
1882 struct interface
*ifp
;
1887 * Do not install bad routes if they are not already present.
1888 * This includes routes that had RS_NET_SYN for interfaces that
1891 if (ag
->ag_metric
== HOPCNT_INFINITY
) {
1892 k
= kern_find(htonl(ag
->ag_dst_h
), ag
->ag_mask
,
1893 ag
->ag_nhop
, ag
->ag_ifp
, NULL
);
1897 k
= kern_add(htonl(ag
->ag_dst_h
), ag
->ag_mask
, ag
->ag_nhop
,
1901 if (k
->k_state
& KS_NEW
) {
1902 /* will need to add new entry to the kernel table */
1903 k
->k_state
= KS_ADD
;
1904 if (ag
->ag_state
& AGS_GATEWAY
)
1905 k
->k_state
|= KS_GATEWAY
;
1906 if (ag
->ag_state
& AGS_IF
)
1907 k
->k_state
|= KS_IF
;
1908 if (ag
->ag_state
& AGS_PASSIVE
)
1909 k
->k_state
|= KS_PASSIVE
;
1910 if (ag
->ag_state
& AGS_FILE
)
1911 k
->k_state
|= KS_FILE
;
1912 k
->k_gate
= ag
->ag_nhop
;
1914 k
->k_metric
= ag
->ag_metric
;
1918 if ((k
->k_state
& (KS_STATIC
|KS_DEPRE_IF
)) ||
1919 ((k
->k_state
& (KS_IF
|KS_PASSIVE
)) == KS_IF
)) {
1923 /* modify existing kernel entry if necessary */
1924 if (k
->k_gate
== ag
->ag_nhop
&& k
->k_ifp
== ag
->ag_ifp
&&
1925 k
->k_metric
!= ag
->ag_metric
) {
1927 * Must delete bad interface routes etc.
1930 if (k
->k_metric
== HOPCNT_INFINITY
)
1931 k
->k_state
|= KS_DEL_ADD
;
1932 k
->k_gate
= ag
->ag_nhop
;
1933 k
->k_metric
= ag
->ag_metric
;
1934 k
->k_state
|= KS_CHANGE
;
1938 * If the daemon thinks the route should exist, forget
1939 * about any redirections.
1940 * If the daemon thinks the route should exist, eventually
1941 * override manual intervention by the operator.
1943 if ((k
->k_state
& (KS_DYNAMIC
| KS_DELETED
)) != 0) {
1944 k
->k_state
&= ~KS_DYNAMIC
;
1945 k
->k_state
|= (KS_ADD
| KS_DEL_ADD
);
1948 if ((k
->k_state
& KS_GATEWAY
) && !(ag
->ag_state
& AGS_GATEWAY
)) {
1949 k
->k_state
&= ~KS_GATEWAY
;
1950 k
->k_state
|= (KS_ADD
| KS_DEL_ADD
);
1951 } else if (!(k
->k_state
& KS_GATEWAY
) && (ag
->ag_state
& AGS_GATEWAY
)) {
1952 k
->k_state
|= KS_GATEWAY
;
1953 k
->k_state
|= (KS_ADD
| KS_DEL_ADD
);
1957 * Deleting-and-adding is necessary to change aspects of a route.
1958 * Just delete instead of deleting and then adding a bad route.
1959 * Otherwise, we want to keep the route in the kernel.
1961 if (k
->k_metric
== HOPCNT_INFINITY
&& (k
->k_state
& KS_DEL_ADD
))
1962 k
->k_state
|= KS_DELETE
;
1964 k
->k_state
&= ~KS_DELETE
;
1969 * Update our image of the kernel forwarding table using the given
1970 * route from our internal routing table.
1975 walk_kern(struct radix_node
*rn
, void *argp
)
1977 #define RT ((struct rt_entry *)rn)
1978 uint8_t metric
, pref
;
1981 struct rt_spare
*rts
;
1983 /* Do not install synthetic routes */
1984 if (RT
->rt_state
& RS_NET_SYN
)
1988 * Do not install static routes here. Only
1989 * read_rt->rtm_add->kern_add should install those
1991 if ((RT
->rt_state
& RS_STATIC
) &&
1992 (RT
->rt_spares
[0].rts_origin
!= RO_FILE
))
1995 /* Do not clobber kernel if this is a route for a dead interface */
1996 if (RT
->rt_state
& RS_BADIF
)
1999 if (!(RT
->rt_state
& RS_IF
)) {
2000 /* This is an ordinary route, not for an interface. */
2003 * aggregate, ordinary good routes without regard to
2007 ags
|= (AGS_GATEWAY
| AGS_SUPPRESS
| AGS_AGGREGATE
);
2010 * Do not install host routes directly to hosts, to avoid
2011 * interfering with ARP entries in the kernel table.
2013 if (RT_ISHOST(RT
) && ntohl(RT
->rt_dst
) == RT
->rt_gate
)
2018 * This is an interface route.
2019 * Do not install routes for "external" remote interfaces.
2021 if (RT
->rt_ifp
!= NULL
&& (RT
->rt_ifp
->int_state
& IS_EXTERNAL
))
2024 /* Interfaces should override received routes. */
2026 ags
|= (AGS_IF
| AGS_CORS_GATE
);
2027 if (RT
->rt_ifp
!= NULL
&&
2028 !(RT
->rt_ifp
->int_if_flags
& IFF_LOOPBACK
) &&
2029 (RT
->rt_ifp
->int_state
& (IS_PASSIVE
|IS_ALIAS
)) ==
2035 * If it is not an interface, or an alias for an interface,
2036 * it must be a "gateway."
2038 * If it is a "remote" interface, it is also a "gateway" to
2039 * the kernel if is not a alias.
2041 if (RT
->rt_ifp
== NULL
|| (RT
->rt_ifp
->int_state
& IS_REMOTE
)) {
2043 ags
|= (AGS_GATEWAY
| AGS_SUPPRESS
);
2046 * Do not aggregate IS_PASSIVE routes.
2048 if (!(RT
->rt_ifp
->int_state
& IS_PASSIVE
))
2049 ags
|= AGS_AGGREGATE
;
2053 metric
= RT
->rt_metric
;
2054 if (metric
== HOPCNT_INFINITY
) {
2055 /* If the route is dead, try hard to aggregate. */
2056 pref
= HOPCNT_INFINITY
;
2057 ags
|= (AGS_FINE_GATE
| AGS_SUPPRESS
);
2058 ags
&= ~(AGS_IF
| AGS_CORS_GATE
);
2062 * dump all routes that have the same metric as rt_spares[0]
2063 * into the kern_table, to be added to the kernel.
2065 for (i
= 0; i
< RT
->rt_num_spares
; i
++) {
2066 rts
= &RT
->rt_spares
[i
];
2068 /* Do not install external routes */
2069 if (rts
->rts_flags
& RTS_EXTERNAL
)
2072 if (rts
->rts_metric
== metric
) {
2073 ag_check(RT
->rt_dst
, RT
->rt_mask
,
2074 rts
->rts_router
, rts
->rts_ifp
, rts
->rts_gate
,
2076 (rts
->rts_origin
& RO_FILE
) ? (ags
|AGS_FILE
) : ags
,
2085 /* Update the kernel table to match the daemon table. */
2090 struct khash
*k
, *pk
, *knext
;
2093 need_kern
= age_timer
;
2095 /* Walk daemon table, updating the copy of the kernel table. */
2096 (void) rn_walktree(rhead
, walk_kern
, NULL
);
2097 ag_flush(0, 0, kern_out
);
2099 for (i
= 0; i
< KHASH_SIZE
; i
++) {
2101 for (k
= khash_bins
[i
]; k
!= NULL
; k
= knext
) {
2104 /* Do not touch local interface routes */
2105 if ((k
->k_state
& KS_DEPRE_IF
) ||
2106 (k
->k_state
& (KS_IF
|KS_PASSIVE
)) == KS_IF
) {
2111 /* Do not touch static routes */
2112 if (k
->k_state
& KS_STATIC
) {
2113 kern_check_static(k
, 0);
2118 /* check hold on routes deleted by the operator */
2119 if (k
->k_keep
> now
.tv_sec
) {
2120 /* ensure we check when the hold is over */
2121 LIM_SEC(need_kern
, k
->k_keep
);
2126 if ((k
->k_state
& KS_DELETE
) &&
2127 !(k
->k_state
& KS_DYNAMIC
)) {
2128 if ((k
->k_dst
== RIP_DEFAULT
) &&
2129 (k
->k_ifp
!= NULL
) &&
2130 (kern_alternate(RIP_DEFAULT
,
2131 k
->k_mask
, k
->k_gate
, k
->k_ifp
,
2133 rdisc_restore(k
->k_ifp
);
2134 kern_ioctl(k
, RTM_DELETE
, 0);
2138 khash_bins
[i
] = knext
;
2143 if (k
->k_state
& KS_DEL_ADD
)
2144 kern_ioctl(k
, RTM_DELETE
, 0);
2146 if (k
->k_state
& KS_ADD
) {
2147 if ((k
->k_dst
== RIP_DEFAULT
) &&
2149 rdisc_suppress(k
->k_ifp
);
2150 kern_ioctl(k
, RTM_ADD
,
2151 ((0 != (k
->k_state
& (KS_GATEWAY
|
2152 KS_DYNAMIC
))) ? RTF_GATEWAY
: 0));
2153 } else if (k
->k_state
& KS_CHANGE
) {
2154 kern_ioctl(k
, RTM_CHANGE
,
2155 ((0 != (k
->k_state
& (KS_GATEWAY
|
2156 KS_DYNAMIC
))) ? RTF_GATEWAY
: 0));
2158 k
->k_state
&= ~(KS_ADD
|KS_CHANGE
|KS_DEL_ADD
);
2161 * Mark this route to be deleted in the next cycle.
2162 * This deletes routes that disappear from the
2163 * daemon table, since the normal aging code
2164 * will clear the bit for routes that have not
2165 * disappeared from the daemon table.
2167 k
->k_state
|= KS_DELETE
;
2174 /* Delete a static route in the image of the kernel table. */
2176 del_static(in_addr_t dst
, in_addr_t mask
, in_addr_t gate
,
2177 struct interface
*ifp
, int gone
)
2180 struct rt_entry
*rt
;
2183 * Just mark it in the table to be deleted next time the kernel
2185 * If it has already been deleted, mark it as such, and set its
2186 * keep-timer so that it will not be deleted again for a while.
2187 * This lets the operator delete a route added by the daemon
2188 * and add a replacement.
2190 k
= kern_find(dst
, mask
, gate
, ifp
, NULL
);
2191 if (k
!= NULL
&& (gate
== 0 || k
->k_gate
== gate
)) {
2192 k
->k_state
&= ~(KS_STATIC
| KS_DYNAMIC
| KS_CHECK
);
2193 k
->k_state
|= KS_DELETE
;
2195 k
->k_state
|= KS_DELETED
;
2196 k
->k_keep
= now
.tv_sec
+ K_KEEP_LIM
;
2200 rt
= rtget(dst
, mask
);
2201 if (rt
!= NULL
&& (rt
->rt_state
& RS_STATIC
))
2207 * Delete all routes generated from ICMP Redirects that use a given gateway,
2208 * as well as old redirected routes.
2211 del_redirects(in_addr_t bad_gate
, time_t old
)
2215 boolean_t dosupply
= should_supply(NULL
);
2217 for (i
= 0; i
< KHASH_SIZE
; i
++) {
2218 for (k
= khash_bins
[i
]; k
!= NULL
; k
= k
->k_next
) {
2219 if (!(k
->k_state
& KS_DYNAMIC
) ||
2220 (k
->k_state
& (KS_STATIC
|KS_IF
|KS_DEPRE_IF
)))
2223 if (k
->k_gate
!= bad_gate
&& k
->k_redirect_time
> old
&&
2227 k
->k_state
|= KS_DELETE
;
2228 k
->k_state
&= ~KS_DYNAMIC
;
2229 need_kern
.tv_sec
= now
.tv_sec
;
2230 trace_act("mark redirected %s --> %s for deletion",
2231 addrname(k
->k_dst
, k
->k_mask
, 0),
2232 naddr_ntoa(k
->k_gate
));
2237 /* Start the daemon tables. */
2244 /* Initialize the radix trees */
2246 (void) rn_inithead((void**)&rhead
, 32);
2248 /* mark all of the slots in the table free */
2249 ag_avail
= ag_slots
;
2250 for (ag
= ag_slots
, i
= 1; i
< NUM_AG_SLOTS
; i
++) {
2257 static struct sockaddr_in dst_sock
= {AF_INET
};
2258 static struct sockaddr_in mask_sock
= {AF_INET
};
2262 set_need_flash(void)
2265 need_flash
= _B_TRUE
;
2267 * Do not send the flash update immediately. Wait a little
2268 * while to hear from other routers.
2270 no_flash
.tv_sec
= now
.tv_sec
+ MIN_WAITTIME
;
2275 /* Get a particular routing table entry */
2277 rtget(in_addr_t dst
, in_addr_t mask
)
2279 struct rt_entry
*rt
;
2281 dst_sock
.sin_addr
.s_addr
= dst
;
2282 mask_sock
.sin_addr
.s_addr
= htonl(mask
);
2283 rt
= (struct rt_entry
*)rhead
->rnh_lookup(&dst_sock
, &mask_sock
, rhead
);
2284 if (rt
== NULL
|| rt
->rt_dst
!= dst
|| rt
->rt_mask
!= mask
)
2291 /* Find a route to dst as the kernel would. */
2293 rtfind(in_addr_t dst
)
2295 dst_sock
.sin_addr
.s_addr
= dst
;
2296 return ((struct rt_entry
*)rhead
->rnh_matchaddr(&dst_sock
, rhead
));
2299 /* add a route to the table */
2301 rtadd(in_addr_t dst
,
2303 uint16_t state
, /* rt_state for the entry */
2304 struct rt_spare
*new)
2306 struct rt_entry
*rt
;
2309 struct rt_spare
*rts
;
2311 /* This is the only function that increments total_routes. */
2312 if (total_routes
== MAX_ROUTES
) {
2313 msglog("have maximum (%d) routes", total_routes
);
2317 rt
= rtmalloc(sizeof (*rt
), "rtadd");
2318 (void) memset(rt
, 0, sizeof (*rt
));
2319 rt
->rt_spares
= rtmalloc(SPARE_INC
* sizeof (struct rt_spare
),
2321 rt
->rt_num_spares
= SPARE_INC
;
2322 (void) memset(rt
->rt_spares
, 0, SPARE_INC
* sizeof (struct rt_spare
));
2323 for (rts
= rt
->rt_spares
, i
= rt
->rt_num_spares
; i
!= 0; i
--, rts
++)
2324 rts
->rts_metric
= HOPCNT_INFINITY
;
2326 rt
->rt_nodes
->rn_key
= (uint8_t *)&rt
->rt_dst_sock
;
2328 rt
->rt_dst_sock
.sin_family
= AF_INET
;
2329 if (mask
!= HOST_MASK
) {
2330 smask
= std_mask(dst
);
2331 if ((smask
& ~mask
) == 0 && mask
> smask
)
2334 mask_sock
.sin_addr
.s_addr
= htonl(mask
);
2336 rt
->rt_spares
[0] = *new;
2337 rt
->rt_state
= state
;
2338 rt
->rt_time
= now
.tv_sec
;
2339 rt
->rt_poison_metric
= HOPCNT_INFINITY
;
2340 rt
->rt_seqno
= update_seqno
;
2343 trace_add_del("Add", rt
);
2345 need_kern
.tv_sec
= now
.tv_sec
;
2348 if (NULL
== rhead
->rnh_addaddr(&rt
->rt_dst_sock
, &mask_sock
, rhead
,
2350 msglog("rnh_addaddr() failed for %s mask=%s",
2351 naddr_ntoa(dst
), naddr_ntoa(htonl(mask
)));
2359 /* notice a changed route */
2361 rtchange(struct rt_entry
*rt
,
2362 uint16_t state
, /* new state bits */
2363 struct rt_spare
*new,
2366 if (rt
->rt_metric
!= new->rts_metric
) {
2368 * Fix the kernel immediately if it seems the route
2369 * has gone bad, since there may be a working route that
2370 * aggregates this route.
2372 if (new->rts_metric
== HOPCNT_INFINITY
) {
2373 need_kern
.tv_sec
= now
.tv_sec
;
2374 if (new->rts_time
>= now
.tv_sec
- EXPIRE_TIME
)
2375 new->rts_time
= now
.tv_sec
- EXPIRE_TIME
;
2377 rt
->rt_seqno
= update_seqno
;
2381 if (rt
->rt_gate
!= new->rts_gate
) {
2382 need_kern
.tv_sec
= now
.tv_sec
;
2383 rt
->rt_seqno
= update_seqno
;
2387 state
|= (rt
->rt_state
& RS_SUBNET
);
2389 /* Keep various things from deciding ageless routes are stale. */
2390 if (!AGE_RT(state
, rt
->rt_spares
[0].rts_origin
, new->rts_ifp
))
2391 new->rts_time
= now
.tv_sec
;
2394 trace_change(rt
, state
, new,
2395 label
? label
: "Chg ");
2397 rt
->rt_state
= state
;
2399 * If the interface state of the new primary route is good,
2400 * turn off RS_BADIF flag
2402 if ((rt
->rt_state
& RS_BADIF
) &&
2403 IS_IFF_UP(new->rts_ifp
->int_if_flags
) &&
2404 !(new->rts_ifp
->int_state
& (IS_BROKE
| IS_SICK
)))
2405 rt
->rt_state
&= ~(RS_BADIF
);
2407 rt
->rt_spares
[0] = *new;
2411 /* check for a better route among the spares */
2412 static struct rt_spare
*
2413 rts_better(struct rt_entry
*rt
)
2415 struct rt_spare
*rts
, *rts1
;
2418 /* find the best alternative among the spares */
2419 rts
= rt
->rt_spares
+1;
2420 for (i
= rt
->rt_num_spares
, rts1
= rts
+1; i
> 2; i
--, rts1
++) {
2421 if (BETTER_LINK(rt
, rts1
, rts
))
2429 /* switch to a backup route */
2431 rtswitch(struct rt_entry
*rt
,
2432 struct rt_spare
*rts
)
2434 struct rt_spare swap
;
2437 /* Do not change permanent routes */
2438 if (0 != (rt
->rt_state
& (RS_MHOME
| RS_STATIC
|
2439 RS_NET_SYN
| RS_IF
)))
2442 /* find the best alternative among the spares */
2444 rts
= rts_better(rt
);
2446 /* Do not bother if it is not worthwhile. */
2447 if (!BETTER_LINK(rt
, rts
, rt
->rt_spares
))
2450 swap
= rt
->rt_spares
[0];
2451 (void) snprintf(label
, sizeof (label
), "Use #%d",
2452 (int)(rts
- rt
->rt_spares
));
2453 rtchange(rt
, rt
->rt_state
& ~(RS_NET_SYN
), rts
, label
);
2455 if (swap
.rts_metric
== HOPCNT_INFINITY
) {
2465 rtdelete(struct rt_entry
*rt
)
2467 struct rt_entry
*deleted_rt
;
2468 struct rt_spare
*rts
;
2470 in_addr_t gate
= rt
->rt_gate
; /* for debugging */
2473 trace_add_del("Del", rt
);
2475 for (i
= 0; i
< rt
->rt_num_spares
; i
++) {
2476 rts
= &rt
->rt_spares
[i
];
2477 rts_delete(rt
, rts
);
2480 dst_sock
.sin_addr
.s_addr
= rt
->rt_dst
;
2481 mask_sock
.sin_addr
.s_addr
= htonl(rt
->rt_mask
);
2482 if (rt
!= (deleted_rt
=
2483 ((struct rt_entry
*)rhead
->rnh_deladdr(&dst_sock
, &mask_sock
,
2485 msglog("rnh_deladdr(%s) failed; found rt 0x%lx",
2486 rtname(rt
->rt_dst
, rt
->rt_mask
, gate
), deleted_rt
);
2490 free(rt
->rt_spares
);
2493 if (dst_sock
.sin_addr
.s_addr
== RIP_DEFAULT
) {
2495 * we just deleted the default route. Trigger rdisc_sort
2496 * so that we can recover from any rdisc information that
2499 rdisc_timer
.tv_sec
= 0;
2504 rts_delete(struct rt_entry
*rt
, struct rt_spare
*rts
)
2508 trace_upslot(rt
, rts
, &rts_empty
);
2509 k
= kern_find(rt
->rt_dst
, rt
->rt_mask
,
2510 rts
->rts_gate
, rts
->rts_ifp
, NULL
);
2512 !(k
->k_state
& KS_DEPRE_IF
) &&
2513 ((k
->k_state
& (KS_IF
|KS_PASSIVE
)) != KS_IF
)) {
2514 k
->k_state
|= KS_DELETE
;
2515 need_kern
.tv_sec
= now
.tv_sec
;
2522 * Get rid of a bad route, and try to switch to a replacement.
2523 * If the route has gone bad because of a bad interface,
2524 * the information about the dead interface is available in badifp
2525 * for the purpose of sanity checks, if_flags checks etc.
2528 rtbad(struct rt_entry
*rt
, struct interface
*badifp
)
2530 struct rt_spare
new;
2534 if (badifp
== NULL
|| (rt
->rt_spares
[0].rts_ifp
== badifp
)) {
2535 /* Poison the route */
2536 new = rt
->rt_spares
[0];
2537 new.rts_metric
= HOPCNT_INFINITY
;
2538 rt_state
= rt
->rt_state
& ~(RS_IF
| RS_LOCAL
| RS_STATIC
);
2541 if (badifp
!= NULL
) {
2543 * Dont mark the rtentry bad unless the ifp for the primary
2544 * route is the bad ifp
2546 if (rt
->rt_spares
[0].rts_ifp
!= badifp
)
2549 * badifp has just gone bad. We want to keep this
2550 * rt_entry around so that we tell our rip-neighbors
2551 * about the bad route, but we can't do anything
2552 * to the kernel itself, so mark it as RS_BADIF
2554 trace_misc("rtbad:Setting RS_BADIF (%s)", badifp
->int_name
);
2555 rt_state
|= RS_BADIF
;
2556 new.rts_ifp
= &dummy_ifp
;
2558 rtchange(rt
, rt_state
, &new, 0);
2564 * Junk a RS_NET_SYN or RS_LOCAL route,
2565 * unless it is needed by another interface.
2568 rtbad_sub(struct rt_entry
*rt
, struct interface
*badifp
)
2570 struct interface
*ifp
, *ifp1
;
2571 struct intnet
*intnetp
;
2578 if (rt
->rt_state
& RS_LOCAL
) {
2580 * Is this the route through loopback for the interface?
2581 * If so, see if it is used by any other interfaces, such
2582 * as a point-to-point interface with the same local address.
2584 for (ifp
= ifnet
; ifp
!= NULL
; ifp
= ifp
->int_next
) {
2585 /* Retain it if another interface needs it. */
2586 if (ifp
->int_addr
== rt
->rt_ifp
->int_addr
) {
2595 if (!(state
& RS_LOCAL
)) {
2597 * Retain RIPv1 logical network route if there is another
2598 * interface that justifies it.
2600 if (rt
->rt_state
& RS_NET_SYN
) {
2601 for (ifp
= ifnet
; ifp
!= NULL
; ifp
= ifp
->int_next
) {
2602 if ((ifp
->int_state
& IS_NEED_NET_SYN
) &&
2603 rt
->rt_mask
== ifp
->int_std_mask
&&
2604 rt
->rt_dst
== ifp
->int_std_addr
) {
2605 state
|= RS_NET_SYN
;
2612 /* or if there is an authority route that needs it. */
2613 for (intnetp
= intnets
; intnetp
!= NULL
;
2614 intnetp
= intnetp
->intnet_next
) {
2615 if (intnetp
->intnet_addr
== rt
->rt_dst
&&
2616 intnetp
->intnet_mask
== rt
->rt_mask
) {
2617 state
|= (RS_NET_SYN
| RS_NET_INT
);
2623 if (ifp1
!= NULL
|| (state
& RS_NET_SYN
)) {
2624 struct rt_spare
new = rt
->rt_spares
[0];
2626 rtchange(rt
, ((rt
->rt_state
& ~(RS_NET_SYN
|RS_LOCAL
)) | state
),
2634 * Called while walking the table looking for sick interfaces
2635 * or after a time change.
2638 walk_bad(struct radix_node
*rn
,
2641 #define RT ((struct rt_entry *)rn)
2642 struct rt_spare
*rts
;
2645 /* fix any spare routes through the interface */
2646 for (i
= 1; i
< RT
->rt_num_spares
; i
++) {
2647 rts
= &((struct rt_entry
*)rn
)->rt_spares
[i
];
2649 if (rts
->rts_metric
< HOPCNT_INFINITY
&&
2650 (rts
->rts_ifp
== NULL
||
2651 (rts
->rts_ifp
->int_state
& IS_BROKE
)))
2652 rts_delete(RT
, rts
);
2654 if (rts
->rts_origin
!= RO_NONE
)
2660 * Deal with the main route
2661 * finished if it has been handled before or if its interface is ok
2663 if (RT
->rt_ifp
== NULL
|| !(RT
->rt_ifp
->int_state
& IS_BROKE
))
2666 /* Bad routes for other than interfaces are easy. */
2667 if (!(RT
->rt_state
& (RS_IF
| RS_NET_SYN
| RS_LOCAL
))) {
2669 RT
->rt_spares
[0].rts_metric
= HOPCNT_INFINITY
;
2672 rtbad(RT
, (struct interface
*)argp
);
2677 rtbad_sub(RT
, (struct interface
*)argp
);
2683 * Called while walking the table to replace a duplicate interface
2687 walk_rewire(struct radix_node
*rn
, void *argp
)
2689 struct rt_entry
*RT
= (struct rt_entry
*)rn
;
2690 struct rewire_data
*wire
= (struct rewire_data
*)argp
;
2691 struct rt_spare
*rts
;
2694 /* fix any spare routes through the interface */
2695 rts
= RT
->rt_spares
;
2696 for (i
= RT
->rt_num_spares
; i
> 0; i
--, rts
++) {
2697 if (rts
->rts_ifp
== wire
->if_old
) {
2698 rts
->rts_ifp
= wire
->if_new
;
2699 if ((RT
->rt_dst
== RIP_DEFAULT
) &&
2700 (wire
->if_old
->int_state
& IS_SUPPRESS_RDISC
))
2701 rdisc_suppress(rts
->rts_ifp
);
2702 if ((rts
->rts_metric
+= wire
->metric_delta
) >
2704 rts
->rts_metric
= HOPCNT_INFINITY
;
2707 * If the main route is getting a worse metric,
2708 * then it may be time to switch to a backup.
2710 if (i
== RT
->rt_num_spares
&& wire
->metric_delta
> 0) {
2719 /* Check the age of an individual route. */
2721 walk_age(struct radix_node
*rn
, void *argp
)
2723 #define RT ((struct rt_entry *)rn)
2724 struct interface
*ifp
;
2725 struct rt_spare
*rts
;
2727 in_addr_t age_bad_gate
= *(in_addr_t
*)argp
;
2731 * age all of the spare routes, including the primary route
2734 rts
= RT
->rt_spares
;
2735 for (i
= RT
->rt_num_spares
; i
!= 0; i
--, rts
++) {
2738 if (i
== RT
->rt_num_spares
) {
2739 if (!AGE_RT(RT
->rt_state
, rts
->rts_origin
, ifp
)) {
2741 * Keep various things from deciding ageless
2744 rts
->rts_time
= now
.tv_sec
;
2748 /* forget RIP routes after RIP has been turned off. */
2750 rts
->rts_time
= now_stale
+ 1;
2754 /* age failing routes */
2755 if (age_bad_gate
== rts
->rts_gate
&&
2756 rts
->rts_time
>= now_stale
) {
2757 rts
->rts_time
-= SUPPLY_INTERVAL
;
2760 /* trash the spare routes when they go bad */
2761 if (rts
->rts_origin
== RO_RIP
&&
2763 (rts
->rts_metric
< HOPCNT_INFINITY
&&
2764 now_garbage
> rts
->rts_time
)) &&
2765 i
!= RT
->rt_num_spares
) {
2766 rts_delete(RT
, rts
);
2771 /* finished if the active route is still fresh */
2772 if (now_stale
<= RT
->rt_time
)
2775 /* try to switch to an alternative */
2778 /* Delete a dead route after it has been publically mourned. */
2779 if (now_garbage
> RT
->rt_time
) {
2784 /* Start poisoning a bad route before deleting it. */
2785 if (now
.tv_sec
- RT
->rt_time
> EXPIRE_TIME
) {
2786 struct rt_spare
new = RT
->rt_spares
[0];
2788 new.rts_metric
= HOPCNT_INFINITY
;
2789 rtchange(RT
, RT
->rt_state
, &new, 0);
2795 /* Watch for dead routes and interfaces. */
2797 age(in_addr_t bad_gate
)
2799 struct interface
*ifp
;
2803 * If not listening to RIP, there is no need to age the routes in
2806 age_timer
.tv_sec
= (now
.tv_sec
2807 + ((rip_sock
< 0) ? NEVER
: SUPPLY_INTERVAL
));
2810 * Check for dead IS_REMOTE interfaces by timing their
2813 for (ifp
= ifnet
; ifp
; ifp
= ifp
->int_next
) {
2814 if (!(ifp
->int_state
& IS_REMOTE
))
2817 /* ignore unreachable remote interfaces */
2818 if (!check_remote(ifp
))
2821 /* Restore remote interface that has become reachable */
2822 if (ifp
->int_state
& IS_BROKE
)
2823 if_ok(ifp
, "remote ", _B_FALSE
);
2825 if (ifp
->int_act_time
!= NEVER
&&
2826 now
.tv_sec
- ifp
->int_act_time
> EXPIRE_TIME
) {
2827 writelog(LOG_NOTICE
,
2828 "remote interface %s to %s timed out after"
2831 naddr_ntoa(ifp
->int_dstaddr
),
2832 (now
.tv_sec
- ifp
->int_act_time
)/60,
2833 (now
.tv_sec
- ifp
->int_act_time
)%60);
2834 if_sick(ifp
, _B_FALSE
);
2838 * If we have not heard from the other router
2841 if (now
.tv_sec
>= ifp
->int_query_time
) {
2842 ifp
->int_query_time
= NEVER
;
2848 (void) rn_walktree(rhead
, walk_age
, &bad_gate
);
2851 * delete old redirected routes to keep the kernel table small
2852 * and prevent blackholes
2854 del_redirects(bad_gate
, now
.tv_sec
-STALE_TIME
);
2856 /* Update the kernel routing table. */
2859 /* poke reticent remote gateways */
2870 for (i
= 0; i
< KHASH_SIZE
; i
++) {
2871 for (k
= khash_bins
[i
]; k
!= NULL
; k
= k
->k_next
)
2877 static struct interface
*
2878 gwkludge_iflookup(in_addr_t dstaddr
, in_addr_t addr
, in_addr_t mask
)
2881 struct interface
*ifp
;
2883 for (ifp
= ifnet
; ifp
!= NULL
; ifp
= ifp
->int_next
) {
2884 int_state
= ifp
->int_state
;
2886 if (!(int_state
& IS_REMOTE
))
2889 if (ifp
->int_dstaddr
== dstaddr
&& ifp
->int_addr
== addr
&&
2890 ifp
->int_mask
== mask
)
2897 * Lookup logical interface structure given the gateway address.
2898 * Returns null if no interfaces match the given name.
2900 static struct interface
*
2901 lifp_iflookup(in_addr_t addr
, const char *name
)
2903 struct physical_interface
*phyi
;
2904 struct interface
*ifp
;
2905 struct interface
*best
= NULL
;
2907 if ((phyi
= phys_byname(name
)) == NULL
)
2910 for (ifp
= phyi
->phyi_interface
; ifp
!= NULL
;
2911 ifp
= ifp
->int_ilist
.hl_next
) {
2913 #ifdef DEBUG_KERNEL_ROUTE_READ
2914 (void) fprintf(stderr
, " checking interface"
2915 " %-4s %-4s %-15s-->%-15s \n",
2916 phyi
->phyi_name
, ifp
->int_name
,
2917 naddr_ntoa(ifp
->int_addr
),
2918 addrname(((ifp
->int_if_flags
& IFF_POINTOPOINT
) ?
2919 ifp
->int_dstaddr
: htonl(ifp
->int_net
)),
2922 /* Exact match found */
2923 if (addr_on_ifp(addr
, ifp
, &best
))
2926 /* No exact match found but return any best match found */