Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / sbin / routed / table.c
blobdc6bed8b5b9594eccdfeb2ccae55e2eae8045c63
1 /* $NetBSD: table.c,v 1.23 2008/12/28 20:15:21 christos Exp $ */
3 /*
4 * Copyright (c) 1983, 1988, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgment:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
36 #include "defs.h"
38 #ifdef __NetBSD__
39 __RCSID("$NetBSD: table.c,v 1.23 2008/12/28 20:15:21 christos Exp $");
40 #elif defined(__FreeBSD__)
41 __RCSID("$FreeBSD$");
42 #else
43 __RCSID("Revision: 2.27 ");
44 #ident "Revision: 2.27 "
45 #endif
47 static struct rt_spare *rts_better(struct rt_entry *);
48 static struct rt_spare rts_empty = {0,0,0,HOPCNT_INFINITY,0,0,0};
49 static void set_need_flash(void);
50 #ifdef _HAVE_SIN_LEN
51 static void masktrim(struct sockaddr_in *ap);
52 #else
53 static void masktrim(struct sockaddr_in_new *ap);
54 #endif
57 struct radix_node_head *rhead; /* root of the radix tree */
59 int need_flash = 1; /* flash update needed
60 * start =1 to suppress the 1st
63 struct timeval age_timer; /* next check of old routes */
64 struct timeval need_kern = { /* need to update kernel table */
65 EPOCH+MIN_WAITTIME-1, 0
68 int stopint;
70 int total_routes;
72 /* zap any old routes through this gateway */
73 naddr age_bad_gate;
76 /* It is desirable to "aggregate" routes, to combine differing routes of
77 * the same metric and next hop into a common route with a smaller netmask
78 * or to suppress redundant routes, routes that add no information to
79 * routes with smaller netmasks.
81 * A route is redundant if and only if any and all routes with smaller
82 * but matching netmasks and nets are the same. Since routes are
83 * kept sorted in the radix tree, redundant routes always come second.
85 * There are two kinds of aggregations. First, two routes of the same bit
86 * mask and differing only in the least significant bit of the network
87 * number can be combined into a single route with a coarser mask.
89 * Second, a route can be suppressed in favor of another route with a more
90 * coarse mask provided no incompatible routes with intermediate masks
91 * are present. The second kind of aggregation involves suppressing routes.
92 * A route must not be suppressed if an incompatible route exists with
93 * an intermediate mask, since the suppressed route would be covered
94 * by the intermediate.
96 * This code relies on the radix tree walk encountering routes
97 * sorted first by address, with the smallest address first.
100 struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest, *ag_finest;
102 /* #define DEBUG_AG */
103 #ifdef DEBUG_AG
104 #define CHECK_AG() {int acnt = 0; struct ag_info *cag; \
105 for (cag = ag_avail; cag != 0; cag = cag->ag_fine) \
106 acnt++; \
107 for (cag = ag_corsest; cag != 0; cag = cag->ag_fine) \
108 acnt++; \
109 if (acnt != NUM_AG_SLOTS) { \
110 (void)fflush(stderr); \
111 abort(); \
114 #else
115 #define CHECK_AG()
116 #endif
119 /* Output the contents of an aggregation table slot.
120 * This function must always be immediately followed with the deletion
121 * of the target slot.
123 static void
124 ag_out(struct ag_info *ag,
125 void (*out)(struct ag_info *))
127 struct ag_info *ag_cors;
128 naddr bit;
131 /* Forget it if this route should not be output for split-horizon. */
132 if (ag->ag_state & AGS_SPLIT_HZ)
133 return;
135 /* If we output both the even and odd twins, then the immediate parent,
136 * if it is present, is redundant, unless the parent manages to
137 * aggregate into something coarser.
138 * On successive calls, this code detects the even and odd twins,
139 * and marks the parent.
141 * Note that the order in which the radix tree code emits routes
142 * ensures that the twins are seen before the parent is emitted.
144 ag_cors = ag->ag_cors;
145 if (ag_cors != 0
146 && ag_cors->ag_mask == ag->ag_mask<<1
147 && ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
148 ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h)
149 ? AGS_REDUN0
150 : AGS_REDUN1);
153 /* Skip it if this route is itself redundant.
155 * It is ok to change the contents of the slot here, since it is
156 * always deleted next.
158 if (ag->ag_state & AGS_REDUN0) {
159 if (ag->ag_state & AGS_REDUN1)
160 return; /* quit if fully redundant */
161 /* make it finer if it is half-redundant */
162 bit = (-ag->ag_mask) >> 1;
163 ag->ag_dst_h |= bit;
164 ag->ag_mask |= bit;
166 } else if (ag->ag_state & AGS_REDUN1) {
167 /* make it finer if it is half-redundant */
168 bit = (-ag->ag_mask) >> 1;
169 ag->ag_mask |= bit;
171 out(ag);
175 static void
176 ag_del(struct ag_info *ag)
178 CHECK_AG();
180 if (ag->ag_cors == 0)
181 ag_corsest = ag->ag_fine;
182 else
183 ag->ag_cors->ag_fine = ag->ag_fine;
185 if (ag->ag_fine == 0)
186 ag_finest = ag->ag_cors;
187 else
188 ag->ag_fine->ag_cors = ag->ag_cors;
190 ag->ag_fine = ag_avail;
191 ag_avail = ag;
193 CHECK_AG();
197 /* Flush routes waiting for aggregation.
198 * This must not suppress a route unless it is known that among all
199 * routes with coarser masks that match it, the one with the longest
200 * mask is appropriate. This is ensured by scanning the routes
201 * in lexical order, and with the most restrictive mask first
202 * among routes to the same destination.
204 void
205 ag_flush(naddr lim_dst_h, /* flush routes to here */
206 naddr lim_mask, /* matching this mask */
207 void (*out)(struct ag_info *))
209 struct ag_info *ag, *ag_cors;
210 naddr dst_h;
213 for (ag = ag_finest;
214 ag != 0 && ag->ag_mask >= lim_mask;
215 ag = ag_cors) {
216 ag_cors = ag->ag_cors;
218 /* work on only the specified routes */
219 dst_h = ag->ag_dst_h;
220 if ((dst_h & lim_mask) != lim_dst_h)
221 continue;
223 if (!(ag->ag_state & AGS_SUPPRESS))
224 ag_out(ag, out);
226 else for ( ; ; ag_cors = ag_cors->ag_cors) {
227 /* Look for a route that can suppress the
228 * current route */
229 if (ag_cors == 0) {
230 /* failed, so output it and look for
231 * another route to work on
233 ag_out(ag, out);
234 break;
237 if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
238 /* We found a route with a coarser mask that
239 * aggregates the current target.
241 * If it has a different next hop, it
242 * cannot replace the target, so output
243 * the target.
245 if (ag->ag_gate != ag_cors->ag_gate
246 && !(ag->ag_state & AGS_FINE_GATE)
247 && !(ag_cors->ag_state & AGS_CORS_GATE)) {
248 ag_out(ag, out);
249 break;
252 /* If the coarse route has a good enough
253 * metric, it suppresses the target.
254 * If the suppressed target was redundant,
255 * then mark the suppressor redundant.
257 if (ag_cors->ag_pref <= ag->ag_pref) {
258 if (AG_IS_REDUN(ag->ag_state)
259 && ag_cors->ag_mask==ag->ag_mask<<1) {
260 if (ag_cors->ag_dst_h == dst_h)
261 ag_cors->ag_state |= AGS_REDUN0;
262 else
263 ag_cors->ag_state |= AGS_REDUN1;
265 if (ag->ag_tag != ag_cors->ag_tag)
266 ag_cors->ag_tag = 0;
267 if (ag->ag_nhop != ag_cors->ag_nhop)
268 ag_cors->ag_nhop = 0;
269 break;
274 /* That route has either been output or suppressed */
275 ag_cors = ag->ag_cors;
276 ag_del(ag);
279 CHECK_AG();
283 /* Try to aggregate a route with previous routes.
285 void
286 ag_check(naddr dst,
287 naddr mask,
288 naddr gate,
289 naddr nhop,
290 char metric,
291 char pref,
292 u_int new_seqno,
293 u_short tag,
294 u_short state,
295 void (*out)(struct ag_info *)) /* output using this */
297 struct ag_info *ag, *nag, *ag_cors;
298 naddr xaddr;
299 int x;
301 dst = ntohl(dst);
303 /* Punt non-contiguous subnet masks.
305 * (X & -X) contains a single bit if and only if X is a power of 2.
306 * (X + (X & -X)) == 0 if and only if X is a power of 2.
308 if ((mask & -mask) + mask != 0) {
309 struct ag_info nc_ag;
311 nc_ag.ag_dst_h = dst;
312 nc_ag.ag_mask = mask;
313 nc_ag.ag_gate = gate;
314 nc_ag.ag_nhop = nhop;
315 nc_ag.ag_metric = metric;
316 nc_ag.ag_pref = pref;
317 nc_ag.ag_tag = tag;
318 nc_ag.ag_state = state;
319 nc_ag.ag_seqno = new_seqno;
320 out(&nc_ag);
321 return;
324 /* Search for the right slot in the aggregation table.
326 ag_cors = 0;
327 ag = ag_corsest;
328 while (ag != 0) {
329 if (ag->ag_mask >= mask)
330 break;
332 /* Suppress old routes (i.e. combine with compatible routes
333 * with coarser masks) as we look for the right slot in the
334 * aggregation table for the new route.
335 * A route to an address less than the current destination
336 * will not be affected by the current route or any route
337 * seen hereafter. That means it is safe to suppress it.
338 * This check keeps poor routes (e.g. with large hop counts)
339 * from preventing suppression of finer routes.
341 if (ag_cors != 0
342 && ag->ag_dst_h < dst
343 && (ag->ag_state & AGS_SUPPRESS)
344 && ag_cors->ag_pref <= ag->ag_pref
345 && (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h
346 && (ag_cors->ag_gate == ag->ag_gate
347 || (ag->ag_state & AGS_FINE_GATE)
348 || (ag_cors->ag_state & AGS_CORS_GATE))) {
349 /* If the suppressed target was redundant,
350 * then mark the suppressor redundant.
352 if (AG_IS_REDUN(ag->ag_state)
353 && ag_cors->ag_mask == ag->ag_mask<<1) {
354 if (ag_cors->ag_dst_h == dst)
355 ag_cors->ag_state |= AGS_REDUN0;
356 else
357 ag_cors->ag_state |= AGS_REDUN1;
359 if (ag->ag_tag != ag_cors->ag_tag)
360 ag_cors->ag_tag = 0;
361 if (ag->ag_nhop != ag_cors->ag_nhop)
362 ag_cors->ag_nhop = 0;
363 ag_del(ag);
364 CHECK_AG();
365 } else {
366 ag_cors = ag;
368 ag = ag_cors->ag_fine;
371 /* If we find the even/odd twin of the new route, and if the
372 * masks and so forth are equal, we can aggregate them.
373 * We can probably promote one of the pair.
375 * Since the routes are encountered in lexical order,
376 * the new route must be odd. However, the second or later
377 * times around this loop, it could be the even twin promoted
378 * from the even/odd pair of twins of the finer route.
380 while (ag != 0
381 && ag->ag_mask == mask
382 && ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
384 /* Here we know the target route and the route in the current
385 * slot have the same netmasks and differ by at most the
386 * last bit. They are either for the same destination, or
387 * for an even/odd pair of destinations.
389 if (ag->ag_dst_h == dst) {
390 /* We have two routes to the same destination.
391 * Routes are encountered in lexical order, so a
392 * route is never promoted until the parent route is
393 * already present. So we know that the new route is
394 * a promoted (or aggregated) pair and the route
395 * already in the slot is the explicit route.
397 * Prefer the best route if their metrics differ,
398 * or the aggregated one if not, following a sort
399 * of longest-match rule.
401 if (pref <= ag->ag_pref) {
402 ag->ag_gate = gate;
403 ag->ag_nhop = nhop;
404 ag->ag_tag = tag;
405 ag->ag_metric = metric;
406 ag->ag_pref = pref;
407 if (ag->ag_seqno < new_seqno)
408 ag->ag_seqno = new_seqno;
409 x = ag->ag_state;
410 ag->ag_state = state;
411 state = x;
414 /* Some bits are set if they are set on either route,
415 * except when the route is for an interface.
417 if (!(ag->ag_state & AGS_IF))
418 ag->ag_state |= (state & (AGS_AGGREGATE_EITHER
419 | AGS_REDUN0
420 | AGS_REDUN1));
421 return;
424 /* If one of the routes can be promoted and the other can
425 * be suppressed, it may be possible to combine them or
426 * worthwhile to promote one.
428 * Any route that can be promoted is always
429 * marked to be eligible to be suppressed.
431 if (!((state & AGS_AGGREGATE)
432 && (ag->ag_state & AGS_SUPPRESS))
433 && !((ag->ag_state & AGS_AGGREGATE)
434 && (state & AGS_SUPPRESS)))
435 break;
437 /* A pair of even/odd twin routes can be combined
438 * if either is redundant, or if they are via the
439 * same gateway and have the same metric.
441 if (AG_IS_REDUN(ag->ag_state)
442 || AG_IS_REDUN(state)
443 || (ag->ag_gate == gate
444 && ag->ag_pref == pref
445 && (state & ag->ag_state & AGS_AGGREGATE) != 0)) {
447 /* We have both the even and odd pairs.
448 * Since the routes are encountered in order,
449 * the route in the slot must be the even twin.
451 * Combine and promote (aggregate) the pair of routes.
453 if (new_seqno < ag->ag_seqno)
454 new_seqno = ag->ag_seqno;
455 if (!AG_IS_REDUN(state))
456 state &= ~AGS_REDUN1;
457 if (AG_IS_REDUN(ag->ag_state))
458 state |= AGS_REDUN0;
459 else
460 state &= ~AGS_REDUN0;
461 state |= (ag->ag_state & AGS_AGGREGATE_EITHER);
462 if (ag->ag_tag != tag)
463 tag = 0;
464 if (ag->ag_nhop != nhop)
465 nhop = 0;
467 /* Get rid of the even twin that was already
468 * in the slot.
470 ag_del(ag);
472 } else if (ag->ag_pref >= pref
473 && (ag->ag_state & AGS_AGGREGATE)) {
474 /* If we cannot combine the pair, maybe the route
475 * with the worse metric can be promoted.
477 * Promote the old, even twin, by giving its slot
478 * in the table to the new, odd twin.
480 ag->ag_dst_h = dst;
482 xaddr = ag->ag_gate;
483 ag->ag_gate = gate;
484 gate = xaddr;
486 xaddr = ag->ag_nhop;
487 ag->ag_nhop = nhop;
488 nhop = xaddr;
490 x = ag->ag_tag;
491 ag->ag_tag = tag;
492 tag = x;
494 /* The promoted route is even-redundant only if the
495 * even twin was fully redundant. It is not
496 * odd-redundant because the odd-twin will still be
497 * in the table.
499 x = ag->ag_state;
500 if (!AG_IS_REDUN(x))
501 x &= ~AGS_REDUN0;
502 x &= ~AGS_REDUN1;
503 ag->ag_state = state;
504 state = x;
506 x = ag->ag_metric;
507 ag->ag_metric = metric;
508 metric = x;
510 x = ag->ag_pref;
511 ag->ag_pref = pref;
512 pref = x;
514 /* take the newest sequence number */
515 if (new_seqno <= ag->ag_seqno)
516 new_seqno = ag->ag_seqno;
517 else
518 ag->ag_seqno = new_seqno;
520 } else {
521 if (!(state & AGS_AGGREGATE))
522 break; /* cannot promote either twin */
524 /* Promote the new, odd twin by shaving its
525 * mask and address.
526 * The promoted route is odd-redundant only if the
527 * odd twin was fully redundant. It is not
528 * even-redundant because the even twin is still in
529 * the table.
531 if (!AG_IS_REDUN(state))
532 state &= ~AGS_REDUN1;
533 state &= ~AGS_REDUN0;
534 if (new_seqno < ag->ag_seqno)
535 new_seqno = ag->ag_seqno;
536 else
537 ag->ag_seqno = new_seqno;
540 mask <<= 1;
541 dst &= mask;
543 if (ag_cors == 0) {
544 ag = ag_corsest;
545 break;
547 ag = ag_cors;
548 ag_cors = ag->ag_cors;
551 /* When we can no longer promote and combine routes,
552 * flush the old route in the target slot. Also flush
553 * any finer routes that we know will never be aggregated by
554 * the new route.
556 * In case we moved toward coarser masks,
557 * get back where we belong
559 if (ag != 0
560 && ag->ag_mask < mask) {
561 ag_cors = ag;
562 ag = ag->ag_fine;
565 /* Empty the target slot
567 if (ag != 0 && ag->ag_mask == mask) {
568 ag_flush(ag->ag_dst_h, ag->ag_mask, out);
569 ag = (ag_cors == 0) ? ag_corsest : ag_cors->ag_fine;
572 #ifdef DEBUG_AG
573 (void)fflush(stderr);
574 if (ag == 0 && ag_cors != ag_finest)
575 abort();
576 if (ag_cors == 0 && ag != ag_corsest)
577 abort();
578 if (ag != 0 && ag->ag_cors != ag_cors)
579 abort();
580 if (ag_cors != 0 && ag_cors->ag_fine != ag)
581 abort();
582 CHECK_AG();
583 #endif
585 /* Save the new route on the end of the table.
587 nag = ag_avail;
588 ag_avail = nag->ag_fine;
590 nag->ag_dst_h = dst;
591 nag->ag_mask = mask;
592 nag->ag_gate = gate;
593 nag->ag_nhop = nhop;
594 nag->ag_metric = metric;
595 nag->ag_pref = pref;
596 nag->ag_tag = tag;
597 nag->ag_state = state;
598 nag->ag_seqno = new_seqno;
600 nag->ag_fine = ag;
601 if (ag != 0)
602 ag->ag_cors = nag;
603 else
604 ag_finest = nag;
605 nag->ag_cors = ag_cors;
606 if (ag_cors == 0)
607 ag_corsest = nag;
608 else
609 ag_cors->ag_fine = nag;
610 CHECK_AG();
614 static const char *
615 rtm_type_name(u_char type)
617 static const char *rtm_types[] = {
618 "RTM_ADD",
619 "RTM_DELETE",
620 "RTM_CHANGE",
621 "RTM_GET",
622 "RTM_LOSING",
623 "RTM_REDIRECT",
624 "RTM_MISS",
625 "RTM_LOCK",
626 "RTM_OLDADD",
627 "RTM_OLDDEL",
628 "RTM_RESOLVE",
629 "RTM_NEWADDR",
630 "RTM_DELADDR",
631 #ifdef RTM_OIFINFO
632 "RTM_OIFINFO",
633 #endif
634 "RTM_IFINFO",
635 "RTM_NEWMADDR",
636 "RTM_DELMADDR"
638 #define NEW_RTM_PAT "RTM type %#x"
639 static char name0[sizeof(NEW_RTM_PAT)+2];
642 if (type > sizeof(rtm_types)/sizeof(rtm_types[0])
643 || type == 0) {
644 snprintf(name0, sizeof(name0), NEW_RTM_PAT, type);
645 return name0;
646 } else {
647 return rtm_types[type-1];
649 #undef NEW_RTM_PAT
653 /* Trim a mask in a sockaddr
654 * Produce a length of 0 for an address of 0.
655 * Otherwise produce the index of the first zero byte.
657 void
658 #ifdef _HAVE_SIN_LEN
659 masktrim(struct sockaddr_in *ap)
660 #else
661 masktrim(struct sockaddr_in_new *ap)
662 #endif
664 char *cp;
666 if (ap->sin_addr.s_addr == 0) {
667 ap->sin_len = 0;
668 return;
670 cp = (char *)(&ap->sin_addr.s_addr+1);
671 while (*--cp == 0)
672 continue;
673 ap->sin_len = cp - (char*)ap + 1;
677 /* Tell the kernel to add, delete or change a route
679 static void
680 rtioctl(int action, /* RTM_DELETE, etc */
681 naddr dst,
682 naddr gate,
683 naddr mask,
684 int metric,
685 int flags)
687 struct {
688 struct rt_msghdr w_rtm;
689 struct sockaddr_in w_dst;
690 struct sockaddr_in w_gate;
691 #ifdef _HAVE_SA_LEN
692 struct sockaddr_in w_mask;
693 #else
694 struct sockaddr_in_new w_mask;
695 #endif
696 } w;
697 long cc;
698 # define PAT " %-10s %s metric=%d flags=%#x"
699 # define ARGS rtm_type_name(action), rtname(dst,mask,gate), metric, flags
701 again:
702 memset(&w, 0, sizeof(w));
703 w.w_rtm.rtm_msglen = sizeof(w);
704 w.w_rtm.rtm_version = RTM_VERSION;
705 w.w_rtm.rtm_type = action;
706 w.w_rtm.rtm_flags = flags;
707 w.w_rtm.rtm_seq = ++rt_sock_seqno;
708 w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
709 if (metric != 0 || action == RTM_CHANGE) {
710 w.w_rtm.rtm_rmx.rmx_hopcount = metric;
711 w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
713 w.w_dst.sin_family = AF_INET;
714 w.w_dst.sin_addr.s_addr = dst;
715 w.w_gate.sin_family = AF_INET;
716 w.w_gate.sin_addr.s_addr = gate;
717 #ifdef _HAVE_SA_LEN
718 w.w_dst.sin_len = sizeof(w.w_dst);
719 w.w_gate.sin_len = sizeof(w.w_gate);
720 #endif
721 if (mask == HOST_MASK) {
722 w.w_rtm.rtm_flags |= RTF_HOST;
723 w.w_rtm.rtm_msglen -= sizeof(w.w_mask);
724 } else {
725 w.w_rtm.rtm_addrs |= RTA_NETMASK;
726 w.w_mask.sin_addr.s_addr = htonl(mask);
727 #ifdef _HAVE_SA_LEN
728 masktrim(&w.w_mask);
729 if (w.w_mask.sin_len == 0)
730 w.w_mask.sin_len = sizeof(long);
731 w.w_rtm.rtm_msglen -= (sizeof(w.w_mask) - w.w_mask.sin_len);
732 #endif
735 #ifndef NO_INSTALL
736 cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
737 if (cc < 0) {
738 if (errno == ESRCH
739 && (action == RTM_CHANGE || action == RTM_DELETE)) {
740 trace_act("route disappeared before" PAT, ARGS);
741 if (action == RTM_CHANGE) {
742 action = RTM_ADD;
743 goto again;
745 return;
747 msglog("write(rt_sock)" PAT ": %s", ARGS, strerror(errno));
748 return;
749 } else if (cc != w.w_rtm.rtm_msglen) {
750 msglog("write(rt_sock) wrote %ld instead of %d for" PAT,
751 cc, w.w_rtm.rtm_msglen, ARGS);
752 return;
754 #endif
755 if (TRACEKERNEL)
756 trace_misc("write kernel" PAT, ARGS);
757 #undef PAT
758 #undef ARGS
762 #define KHASH_SIZE 71 /* should be prime */
763 #define KHASH(a,m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
764 static struct khash {
765 struct khash *k_next;
766 naddr k_dst;
767 naddr k_mask;
768 naddr k_gate;
769 short k_metric;
770 u_short k_state;
771 #define KS_NEW 0x001
772 #define KS_DELETE 0x002 /* need to delete the route */
773 #define KS_ADD 0x004 /* add to the kernel */
774 #define KS_CHANGE 0x008 /* tell kernel to change the route */
775 #define KS_DEL_ADD 0x010 /* delete & add to change the kernel */
776 #define KS_STATIC 0x020 /* Static flag in kernel */
777 #define KS_GATEWAY 0x040 /* G flag in kernel */
778 #define KS_DYNAMIC 0x080 /* result of redirect */
779 #define KS_DELETED 0x100 /* already deleted from kernel */
780 #define KS_CHECK 0x200
781 time_t k_keep;
782 #define K_KEEP_LIM 30
783 time_t k_redirect_time; /* when redirected route 1st seen */
784 } *khash_bins[KHASH_SIZE];
787 static struct khash*
788 kern_find(naddr dst, naddr mask, struct khash ***ppk)
790 struct khash *k, **pk;
792 for (pk = &KHASH(dst,mask); (k = *pk) != 0; pk = &k->k_next) {
793 if (k->k_dst == dst && k->k_mask == mask)
794 break;
796 if (ppk != 0)
797 *ppk = pk;
798 return k;
802 static struct khash*
803 kern_add(naddr dst, naddr mask)
805 struct khash *k, **pk;
807 k = kern_find(dst, mask, &pk);
808 if (k != 0)
809 return k;
811 k = (struct khash *)rtmalloc(sizeof(*k), "kern_add");
813 memset(k, 0, sizeof(*k));
814 k->k_dst = dst;
815 k->k_mask = mask;
816 k->k_state = KS_NEW;
817 k->k_keep = now.tv_sec;
818 *pk = k;
820 return k;
824 /* If a kernel route has a non-zero metric, check that it is still in the
825 * daemon table, and not deleted by interfaces coming and going.
827 static void
828 kern_check_static(struct khash *k,
829 struct interface *ifp)
831 struct rt_entry *rt;
832 struct rt_spare new;
834 if (k->k_metric == 0)
835 return;
837 memset(&new, 0, sizeof(new));
838 new.rts_ifp = ifp;
839 new.rts_gate = k->k_gate;
840 new.rts_router = (ifp != 0) ? ifp->int_addr : loopaddr;
841 new.rts_metric = k->k_metric;
842 new.rts_time = now.tv_sec;
844 rt = rtget(k->k_dst, k->k_mask);
845 if (rt != 0) {
846 if (!(rt->rt_state & RS_STATIC))
847 rtchange(rt, rt->rt_state | RS_STATIC, &new, 0);
848 } else {
849 rtadd(k->k_dst, k->k_mask, RS_STATIC, &new);
854 /* operate on a kernel entry
856 static void
857 kern_ioctl(struct khash *k,
858 int action, /* RTM_DELETE, etc */
859 int flags)
862 switch (action) {
863 case RTM_DELETE:
864 k->k_state &= ~KS_DYNAMIC;
865 if (k->k_state & KS_DELETED)
866 return;
867 k->k_state |= KS_DELETED;
868 break;
869 case RTM_ADD:
870 k->k_state &= ~KS_DELETED;
871 break;
872 case RTM_CHANGE:
873 if (k->k_state & KS_DELETED) {
874 action = RTM_ADD;
875 k->k_state &= ~KS_DELETED;
877 break;
880 rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_metric, flags);
884 /* add a route the kernel told us
886 static void
887 rtm_add(struct rt_msghdr *rtm,
888 struct rt_addrinfo *info,
889 time_t keep)
891 struct khash *k;
892 struct interface *ifp;
893 naddr mask;
896 if (rtm->rtm_flags & RTF_HOST) {
897 mask = HOST_MASK;
898 } else if (INFO_MASK(info) != 0) {
899 mask = ntohl(S_ADDR(INFO_MASK(info)));
900 } else {
901 msglog("ignore %s without mask", rtm_type_name(rtm->rtm_type));
902 return;
905 k = kern_add(S_ADDR(INFO_DST(info)), mask);
906 if (k->k_state & KS_NEW)
907 k->k_keep = now.tv_sec+keep;
908 if (INFO_GATE(info) == 0) {
909 trace_act("note %s without gateway",
910 rtm_type_name(rtm->rtm_type));
911 k->k_metric = HOPCNT_INFINITY;
912 } else if (INFO_GATE(info)->sa_family != AF_INET) {
913 trace_act("note %s with gateway AF=%d",
914 rtm_type_name(rtm->rtm_type),
915 INFO_GATE(info)->sa_family);
916 k->k_metric = HOPCNT_INFINITY;
917 } else {
918 k->k_gate = S_ADDR(INFO_GATE(info));
919 k->k_metric = rtm->rtm_rmx.rmx_hopcount;
920 if (k->k_metric < 0)
921 k->k_metric = 0;
922 else if (k->k_metric > HOPCNT_INFINITY-1)
923 k->k_metric = HOPCNT_INFINITY-1;
925 k->k_state &= ~(KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD
926 | KS_DELETED | KS_GATEWAY | KS_STATIC
927 | KS_NEW | KS_CHECK);
928 if (rtm->rtm_flags & RTF_GATEWAY)
929 k->k_state |= KS_GATEWAY;
930 if (rtm->rtm_flags & RTF_STATIC)
931 k->k_state |= KS_STATIC;
933 if (0 != (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
934 if (INFO_AUTHOR(info) != 0
935 && INFO_AUTHOR(info)->sa_family == AF_INET)
936 ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
937 else
938 ifp = 0;
939 if (supplier
940 && (ifp == 0 || !(ifp->int_state & IS_REDIRECT_OK))) {
941 /* Routers are not supposed to listen to redirects,
942 * so delete it if it came via an unknown interface
943 * or the interface does not have special permission.
945 k->k_state &= ~KS_DYNAMIC;
946 k->k_state |= KS_DELETE;
947 LIM_SEC(need_kern, 0);
948 trace_act("mark for deletion redirected %s --> %s"
949 " via %s",
950 addrname(k->k_dst, k->k_mask, 0),
951 naddr_ntoa(k->k_gate),
952 ifp ? ifp->int_name : "unknown interface");
953 } else {
954 k->k_state |= KS_DYNAMIC;
955 k->k_redirect_time = now.tv_sec;
956 trace_act("accept redirected %s --> %s via %s",
957 addrname(k->k_dst, k->k_mask, 0),
958 naddr_ntoa(k->k_gate),
959 ifp ? ifp->int_name : "unknown interface");
961 return;
964 /* If it is not a static route, quit until the next comparison
965 * between the kernel and daemon tables, when it will be deleted.
967 if (!(k->k_state & KS_STATIC)) {
968 k->k_state |= KS_DELETE;
969 LIM_SEC(need_kern, k->k_keep);
970 return;
973 /* Put static routes with real metrics into the daemon table so
974 * they can be advertised.
976 * Find the interface toward the gateway.
978 ifp = iflookup(k->k_gate);
979 if (ifp == 0)
980 msglog("static route %s --> %s impossibly lacks ifp",
981 addrname(S_ADDR(INFO_DST(info)), mask, 0),
982 naddr_ntoa(k->k_gate));
984 kern_check_static(k, ifp);
988 /* deal with packet loss
990 static void
991 rtm_lose(struct rt_msghdr *rtm,
992 struct rt_addrinfo *info)
994 if (INFO_GATE(info) == 0
995 || INFO_GATE(info)->sa_family != AF_INET) {
996 trace_act("ignore %s without gateway",
997 rtm_type_name(rtm->rtm_type));
998 return;
1001 if (rdisc_ok)
1002 rdisc_age(S_ADDR(INFO_GATE(info)));
1003 age(S_ADDR(INFO_GATE(info)));
1007 /* Make the gateway slot of an info structure point to something
1008 * useful. If it is not already useful, but it specifies an interface,
1009 * then fill in the sockaddr_in provided and point it there.
1011 static int
1012 get_info_gate(const struct sockaddr **sap,
1013 struct sockaddr_in *rsin)
1015 const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)*sap;
1016 struct interface *ifp;
1018 if (sdl == 0)
1019 return 0;
1020 if ((sdl)->sdl_family == AF_INET)
1021 return 1;
1022 if ((sdl)->sdl_family != AF_LINK)
1023 return 0;
1025 ifp = ifwithindex(sdl->sdl_index, 1);
1026 if (ifp == 0)
1027 return 0;
1029 rsin->sin_addr.s_addr = ifp->int_addr;
1030 #ifdef _HAVE_SA_LEN
1031 rsin->sin_len = sizeof(*rsin);
1032 #endif
1033 rsin->sin_family = AF_INET;
1034 *sap = (const struct sockaddr*)rsin;
1036 return 1;
1040 /* Clean the kernel table by copying it to the daemon image.
1041 * Eventually the daemon will delete any extra routes.
1043 void
1044 flush_kern(void)
1046 static char *sysctl_buf;
1047 static size_t sysctl_buf_size = 0;
1048 size_t needed;
1049 int mib[6];
1050 char *next, *lim;
1051 struct rt_msghdr *rtm;
1052 struct sockaddr_in gate_sin;
1053 struct rt_addrinfo info;
1054 int i;
1055 struct khash *k;
1058 for (i = 0; i < KHASH_SIZE; i++) {
1059 for (k = khash_bins[i]; k != 0; k = k->k_next) {
1060 k->k_state |= KS_CHECK;
1064 mib[0] = CTL_NET;
1065 mib[1] = PF_ROUTE;
1066 mib[2] = 0; /* protocol */
1067 mib[3] = 0; /* wildcard address family */
1068 mib[4] = NET_RT_DUMP;
1069 mib[5] = 0; /* no flags */
1070 for (;;) {
1071 if ((needed = sysctl_buf_size) != 0) {
1072 if (sysctl(mib, 6, sysctl_buf,&needed, 0, 0) >= 0)
1073 break;
1074 if (errno != ENOMEM && errno != EFAULT)
1075 BADERR(1,"flush_kern: sysctl(RT_DUMP)");
1076 free(sysctl_buf);
1077 needed = 0;
1079 if (sysctl(mib, 6, 0, &needed, 0, 0) < 0)
1080 BADERR(1,"flush_kern: sysctl(RT_DUMP) estimate");
1081 /* Kludge around the habit of some systems, such as
1082 * BSD/OS 3.1, to not admit how many routes are in the
1083 * kernel, or at least to be quite wrong.
1085 needed += 50*(sizeof(*rtm)+5*sizeof(struct sockaddr));
1086 sysctl_buf = rtmalloc(sysctl_buf_size = needed,
1087 "flush_kern sysctl(RT_DUMP)");
1090 lim = sysctl_buf + needed;
1091 for (next = sysctl_buf; next < lim; next += rtm->rtm_msglen) {
1092 rtm = (struct rt_msghdr *)next;
1093 if (rtm->rtm_msglen == 0) {
1094 msglog("zero length kernel route at "
1095 " %#lx in buffer %#lx before %#lx",
1096 (u_long)rtm, (u_long)sysctl_buf, (u_long)lim);
1097 break;
1100 rt_xaddrs(&info,
1101 (struct sockaddr *)(rtm+1),
1102 (struct sockaddr *)(next + rtm->rtm_msglen),
1103 rtm->rtm_addrs);
1105 if (INFO_DST(&info) == 0
1106 || INFO_DST(&info)->sa_family != AF_INET)
1107 continue;
1109 /* ignore ARP table entries on systems with a merged route
1110 * and ARP table.
1112 if (rtm->rtm_flags & RTF_LLINFO)
1113 continue;
1115 /* ignore cloned routes
1117 #if defined(RTF_CLONED) && defined(__bsdi__)
1118 if (rtm->rtm_flags & RTF_CLONED)
1119 continue;
1120 #endif
1121 #if defined(RTF_WASCLONED) && defined(__FreeBSD__)
1122 if (rtm->rtm_flags & RTF_WASCLONED)
1123 continue;
1124 #endif
1127 /* ignore multicast addresses
1129 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))))
1130 continue;
1132 if (!get_info_gate(&INFO_GATE(&info), &gate_sin))
1133 continue;
1135 /* Note static routes and interface routes, and also
1136 * preload the image of the kernel table so that
1137 * we can later clean it, as well as avoid making
1138 * unneeded changes. Keep the old kernel routes for a
1139 * few seconds to allow a RIP or router-discovery
1140 * response to be heard.
1142 rtm_add(rtm,&info,MIN_WAITTIME);
1145 for (i = 0; i < KHASH_SIZE; i++) {
1146 for (k = khash_bins[i]; k != 0; k = k->k_next) {
1147 if (k->k_state & KS_CHECK) {
1148 msglog("%s --> %s disappeared from kernel",
1149 addrname(k->k_dst, k->k_mask, 0),
1150 naddr_ntoa(k->k_gate));
1151 del_static(k->k_dst, k->k_mask, k->k_gate, 1);
1158 /* Listen to announcements from the kernel
1160 void
1161 read_rt(void)
1163 long cc;
1164 struct interface *ifp;
1165 struct sockaddr_in gate_sin;
1166 naddr mask, gate;
1167 union {
1168 struct {
1169 struct rt_msghdr rtm;
1170 struct sockaddr addrs[RTAX_MAX];
1171 } r;
1172 struct if_msghdr ifm;
1173 } m;
1174 char str[100], *strp;
1175 struct rt_addrinfo info;
1178 for (;;) {
1179 cc = read(rt_sock, &m, sizeof(m));
1180 if (cc <= 0) {
1181 if (cc < 0 && errno != EWOULDBLOCK)
1182 LOGERR("read(rt_sock)");
1183 return;
1186 if (m.r.rtm.rtm_version != RTM_VERSION) {
1187 msglog("bogus routing message version %d",
1188 m.r.rtm.rtm_version);
1189 continue;
1192 /* Ignore our own results.
1194 if (m.r.rtm.rtm_type <= RTM_CHANGE
1195 && m.r.rtm.rtm_pid == mypid) {
1196 static int complained = 0;
1197 if (!complained) {
1198 msglog("receiving our own change messages");
1199 complained = 1;
1201 continue;
1204 if (m.r.rtm.rtm_type == RTM_IFINFO
1205 || m.r.rtm.rtm_type == RTM_NEWADDR
1206 || m.r.rtm.rtm_type == RTM_DELADDR) {
1207 ifp = ifwithindex(m.ifm.ifm_index,
1208 m.r.rtm.rtm_type != RTM_DELADDR);
1209 if (ifp == 0)
1210 trace_act("note %s with flags %#x"
1211 " for unknown interface index #%d",
1212 rtm_type_name(m.r.rtm.rtm_type),
1213 m.ifm.ifm_flags,
1214 m.ifm.ifm_index);
1215 else
1216 trace_act("note %s with flags %#x for %s",
1217 rtm_type_name(m.r.rtm.rtm_type),
1218 m.ifm.ifm_flags,
1219 ifp->int_name);
1221 /* After being informed of a change to an interface,
1222 * check them all now if the check would otherwise
1223 * be a long time from now, if the interface is
1224 * not known, or if the interface has been turned
1225 * off or on.
1227 if (ifinit_timer.tv_sec-now.tv_sec>=CHECK_BAD_INTERVAL
1228 || ifp == 0
1229 || ((ifp->int_if_flags ^ m.ifm.ifm_flags)
1230 & IFF_UP) != 0)
1231 ifinit_timer.tv_sec = now.tv_sec;
1232 continue;
1234 #ifdef RTM_OIFINFO
1235 if (m.r.rtm.rtm_type == RTM_OIFINFO)
1236 continue; /* ignore compat message */
1237 #endif
1239 strlcpy(str, rtm_type_name(m.r.rtm.rtm_type), sizeof(str));
1240 strp = &str[strlen(str)];
1241 if (m.r.rtm.rtm_type <= RTM_CHANGE) {
1242 snprintf(strp, str + sizeof(str) - strp,
1243 " from pid %d",m.r.rtm.rtm_pid);
1244 strp += strlen(strp);
1247 rt_xaddrs(&info, m.r.addrs, &m.r.addrs[RTAX_MAX],
1248 m.r.rtm.rtm_addrs);
1250 if (INFO_DST(&info) == 0) {
1251 trace_act("ignore %s without dst", str);
1252 continue;
1255 if (INFO_DST(&info)->sa_family != AF_INET) {
1256 trace_act("ignore %s for AF %d", str,
1257 INFO_DST(&info)->sa_family);
1258 continue;
1261 mask = ((INFO_MASK(&info) != 0)
1262 ? ntohl(S_ADDR(INFO_MASK(&info)))
1263 : (m.r.rtm.rtm_flags & RTF_HOST)
1264 ? HOST_MASK
1265 : std_mask(S_ADDR(INFO_DST(&info))));
1267 snprintf(strp, str + sizeof(str) - strp, ": %s",
1268 addrname(S_ADDR(INFO_DST(&info)), mask, 0));
1269 strp += strlen(strp);
1271 if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info))))) {
1272 trace_act("ignore multicast %s", str);
1273 continue;
1276 if (m.r.rtm.rtm_flags & RTF_LLINFO) {
1277 trace_act("ignore ARP %s", str);
1278 continue;
1281 #if defined(RTF_CLONED) && defined(__bsdi__)
1282 if (m.r.rtm.rtm_flags & RTF_CLONED) {
1283 trace_act("ignore cloned %s", str);
1284 continue;
1286 #endif
1287 #if defined(RTF_WASCLONED) && defined(__FreeBSD__)
1288 if (m.r.rtm.rtm_flags & RTF_WASCLONED) {
1289 trace_act("ignore cloned %s", str);
1290 continue;
1292 #endif
1294 if (get_info_gate(&INFO_GATE(&info), &gate_sin)) {
1295 gate = S_ADDR(INFO_GATE(&info));
1296 snprintf(strp, str + sizeof(str) - strp,
1297 " --> %s", naddr_ntoa(gate));
1298 strp += strlen(strp);
1299 } else {
1300 gate = 0;
1303 if (INFO_AUTHOR(&info) != 0)
1304 snprintf(strp, str + sizeof(str) - strp,
1305 " by authority of %s",
1306 saddr_ntoa(INFO_AUTHOR(&info)));
1307 strp += strlen(strp);
1309 switch (m.r.rtm.rtm_type) {
1310 case RTM_ADD:
1311 case RTM_CHANGE:
1312 case RTM_REDIRECT:
1313 if (m.r.rtm.rtm_errno != 0) {
1314 trace_act("ignore %s with \"%s\" error",
1315 str, strerror(m.r.rtm.rtm_errno));
1316 } else {
1317 trace_act("%s", str);
1318 rtm_add(&m.r.rtm,&info,0);
1320 break;
1322 case RTM_DELETE:
1323 if (m.r.rtm.rtm_errno != 0
1324 && m.r.rtm.rtm_errno != ESRCH) {
1325 trace_act("ignore %s with \"%s\" error",
1326 str, strerror(m.r.rtm.rtm_errno));
1327 } else {
1328 trace_act("%s", str);
1329 del_static(S_ADDR(INFO_DST(&info)), mask,
1330 gate, 1);
1332 break;
1334 case RTM_LOSING:
1335 trace_act("%s", str);
1336 rtm_lose(&m.r.rtm,&info);
1337 break;
1339 default:
1340 trace_act("ignore %s", str);
1341 break;
1347 /* after aggregating, note routes that belong in the kernel
1349 static void
1350 kern_out(struct ag_info *ag)
1352 struct khash *k;
1355 /* Do not install bad routes if they are not already present.
1356 * This includes routes that had RS_NET_SYN for interfaces that
1357 * recently died.
1359 if (ag->ag_metric == HOPCNT_INFINITY) {
1360 k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask, 0);
1361 if (k == 0)
1362 return;
1363 } else {
1364 k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask);
1367 if (k->k_state & KS_NEW) {
1368 /* will need to add new entry to the kernel table */
1369 k->k_state = KS_ADD;
1370 if (ag->ag_state & AGS_GATEWAY)
1371 k->k_state |= KS_GATEWAY;
1372 k->k_gate = ag->ag_gate;
1373 k->k_metric = ag->ag_metric;
1374 return;
1377 if (k->k_state & KS_STATIC)
1378 return;
1380 /* modify existing kernel entry if necessary */
1381 if (k->k_gate != ag->ag_gate
1382 || k->k_metric != ag->ag_metric) {
1383 /* Must delete bad interface routes etc. to change them. */
1384 if (k->k_metric == HOPCNT_INFINITY)
1385 k->k_state |= KS_DEL_ADD;
1386 k->k_gate = ag->ag_gate;
1387 k->k_metric = ag->ag_metric;
1388 k->k_state |= KS_CHANGE;
1391 /* If the daemon thinks the route should exist, forget
1392 * about any redirections.
1393 * If the daemon thinks the route should exist, eventually
1394 * override manual intervention by the operator.
1396 if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) {
1397 k->k_state &= ~KS_DYNAMIC;
1398 k->k_state |= (KS_ADD | KS_DEL_ADD);
1401 if ((k->k_state & KS_GATEWAY)
1402 && !(ag->ag_state & AGS_GATEWAY)) {
1403 k->k_state &= ~KS_GATEWAY;
1404 k->k_state |= (KS_ADD | KS_DEL_ADD);
1405 } else if (!(k->k_state & KS_GATEWAY)
1406 && (ag->ag_state & AGS_GATEWAY)) {
1407 k->k_state |= KS_GATEWAY;
1408 k->k_state |= (KS_ADD | KS_DEL_ADD);
1411 /* Deleting-and-adding is necessary to change aspects of a route.
1412 * Just delete instead of deleting and then adding a bad route.
1413 * Otherwise, we want to keep the route in the kernel.
1415 if (k->k_metric == HOPCNT_INFINITY
1416 && (k->k_state & KS_DEL_ADD))
1417 k->k_state |= KS_DELETE;
1418 else
1419 k->k_state &= ~KS_DELETE;
1420 #undef RT
1424 /* ARGSUSED */
1425 static int
1426 walk_kern(struct radix_node *rn,
1427 struct walkarg *argp UNUSED)
1429 #define RT ((struct rt_entry *)rn)
1430 char metric, pref;
1431 u_int ags = 0;
1434 /* Do not install synthetic routes */
1435 if (RT->rt_state & RS_NET_SYN)
1436 return 0;
1438 if (!(RT->rt_state & RS_IF)) {
1439 /* This is an ordinary route, not for an interface.
1442 /* aggregate, ordinary good routes without regard to
1443 * their metric
1445 pref = 1;
1446 ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1448 /* Do not install host routes directly to hosts, to avoid
1449 * interfering with ARP entries in the kernel table.
1451 if (RT_ISHOST(RT)
1452 && ntohl(RT->rt_dst) == RT->rt_gate)
1453 return 0;
1455 } else {
1456 /* This is an interface route.
1457 * Do not install routes for "external" remote interfaces.
1459 if (RT->rt_ifp != 0 && (RT->rt_ifp->int_state & IS_EXTERNAL))
1460 return 0;
1462 /* Interfaces should override received routes.
1464 pref = 0;
1465 ags |= (AGS_IF | AGS_CORS_GATE);
1467 /* If it is not an interface, or an alias for an interface,
1468 * it must be a "gateway."
1470 * If it is a "remote" interface, it is also a "gateway" to
1471 * the kernel if is not a alias.
1473 if (RT->rt_ifp == 0
1474 || (RT->rt_ifp->int_state & IS_REMOTE))
1475 ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
1478 /* If RIP is off and IRDP is on, let the route to the discovered
1479 * route suppress any RIP routes. Eventually the RIP routes
1480 * will time-out and be deleted. This reaches the steady-state
1481 * quicker.
1483 if ((RT->rt_state & RS_RDISC) && rip_sock < 0)
1484 ags |= AGS_CORS_GATE;
1486 metric = RT->rt_metric;
1487 if (metric == HOPCNT_INFINITY) {
1488 /* if the route is dead, so try hard to aggregate. */
1489 pref = HOPCNT_INFINITY;
1490 ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
1491 ags &= ~(AGS_IF | AGS_CORS_GATE);
1494 ag_check(RT->rt_dst, RT->rt_mask, RT->rt_gate, 0,
1495 metric,pref, 0, 0, ags, kern_out);
1496 return 0;
1497 #undef RT
1501 /* Update the kernel table to match the daemon table.
1503 static void
1504 fix_kern(void)
1506 int i;
1507 struct khash *k, **pk;
1510 need_kern = age_timer;
1512 /* Walk daemon table, updating the copy of the kernel table.
1514 (void)rn_walktree(rhead, walk_kern, 0);
1515 ag_flush(0,0,kern_out);
1517 for (i = 0; i < KHASH_SIZE; i++) {
1518 for (pk = &khash_bins[i]; (k = *pk) != 0; ) {
1519 /* Do not touch static routes */
1520 if (k->k_state & KS_STATIC) {
1521 kern_check_static(k,0);
1522 pk = &k->k_next;
1523 continue;
1526 /* check hold on routes deleted by the operator */
1527 if (k->k_keep > now.tv_sec) {
1528 /* ensure we check when the hold is over */
1529 LIM_SEC(need_kern, k->k_keep);
1530 /* mark for the next cycle */
1531 k->k_state |= KS_DELETE;
1532 pk = &k->k_next;
1533 continue;
1536 if ((k->k_state & KS_DELETE)
1537 && !(k->k_state & KS_DYNAMIC)) {
1538 kern_ioctl(k, RTM_DELETE, 0);
1539 *pk = k->k_next;
1540 free(k);
1541 continue;
1544 if (k->k_state & KS_DEL_ADD)
1545 kern_ioctl(k, RTM_DELETE, 0);
1547 if (k->k_state & KS_ADD) {
1548 kern_ioctl(k, RTM_ADD,
1549 ((0 != (k->k_state & (KS_GATEWAY
1550 | KS_DYNAMIC)))
1551 ? RTF_GATEWAY : 0));
1552 } else if (k->k_state & KS_CHANGE) {
1553 kern_ioctl(k, RTM_CHANGE,
1554 ((0 != (k->k_state & (KS_GATEWAY
1555 | KS_DYNAMIC)))
1556 ? RTF_GATEWAY : 0));
1558 k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
1560 /* Mark this route to be deleted in the next cycle.
1561 * This deletes routes that disappear from the
1562 * daemon table, since the normal aging code
1563 * will clear the bit for routes that have not
1564 * disappeared from the daemon table.
1566 k->k_state |= KS_DELETE;
1567 pk = &k->k_next;
1573 /* Delete a static route in the image of the kernel table.
1575 void
1576 del_static(naddr dst,
1577 naddr mask,
1578 naddr gate,
1579 int gone)
1581 struct khash *k;
1582 struct rt_entry *rt;
1584 /* Just mark it in the table to be deleted next time the kernel
1585 * table is updated.
1586 * If it has already been deleted, mark it as such, and set its
1587 * keep-timer so that it will not be deleted again for a while.
1588 * This lets the operator delete a route added by the daemon
1589 * and add a replacement.
1591 k = kern_find(dst, mask, 0);
1592 if (k != 0 && (gate == 0 || k->k_gate == gate)) {
1593 k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK);
1594 k->k_state |= KS_DELETE;
1595 if (gone) {
1596 k->k_state |= KS_DELETED;
1597 k->k_keep = now.tv_sec + K_KEEP_LIM;
1601 rt = rtget(dst, mask);
1602 if (rt != 0 && (rt->rt_state & RS_STATIC))
1603 rtbad(rt);
1607 /* Delete all routes generated from ICMP Redirects that use a given gateway,
1608 * as well as old redirected routes.
1610 void
1611 del_redirects(naddr bad_gate,
1612 time_t old)
1614 int i;
1615 struct khash *k;
1618 for (i = 0; i < KHASH_SIZE; i++) {
1619 for (k = khash_bins[i]; k != 0; k = k->k_next) {
1620 if (!(k->k_state & KS_DYNAMIC)
1621 || (k->k_state & KS_STATIC))
1622 continue;
1624 if (k->k_gate != bad_gate
1625 && k->k_redirect_time > old
1626 && !supplier)
1627 continue;
1629 k->k_state |= KS_DELETE;
1630 k->k_state &= ~KS_DYNAMIC;
1631 need_kern.tv_sec = now.tv_sec;
1632 trace_act("mark redirected %s --> %s for deletion",
1633 addrname(k->k_dst, k->k_mask, 0),
1634 naddr_ntoa(k->k_gate));
1640 /* Start the daemon tables.
1642 extern int max_keylen;
1644 void
1645 rtinit(void)
1647 int i;
1648 struct ag_info *ag;
1650 /* Initialize the radix trees */
1651 max_keylen = sizeof(struct sockaddr_in);
1652 rn_init();
1653 rn_inithead((void*)&rhead, 32);
1655 /* mark all of the slots in the table free */
1656 ag_avail = ag_slots;
1657 for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
1658 ag->ag_fine = ag+1;
1659 ag++;
1664 #ifdef _HAVE_SIN_LEN
1665 static struct sockaddr_in dst_sock = {sizeof(dst_sock), AF_INET, 0, {0}, {0}};
1666 static struct sockaddr_in mask_sock = {sizeof(mask_sock), AF_INET, 0, {0}, {0}};
1667 #else
1668 static struct sockaddr_in_new dst_sock = {_SIN_ADDR_SIZE, AF_INET};
1669 static struct sockaddr_in_new mask_sock = {_SIN_ADDR_SIZE, AF_INET};
1670 #endif
1673 static void
1674 set_need_flash(void)
1676 if (!need_flash) {
1677 need_flash = 1;
1678 /* Do not send the flash update immediately. Wait a little
1679 * while to hear from other routers.
1681 no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
1686 /* Get a particular routing table entry
1688 struct rt_entry *
1689 rtget(naddr dst, naddr mask)
1691 struct rt_entry *rt;
1693 dst_sock.sin_addr.s_addr = dst;
1694 mask_sock.sin_addr.s_addr = htonl(mask);
1695 masktrim(&mask_sock);
1696 rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock,&mask_sock,rhead);
1697 if (!rt
1698 || rt->rt_dst != dst
1699 || rt->rt_mask != mask)
1700 return 0;
1702 return rt;
1706 /* Find a route to dst as the kernel would.
1708 struct rt_entry *
1709 rtfind(naddr dst)
1711 dst_sock.sin_addr.s_addr = dst;
1712 return (struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead);
1716 /* add a route to the table
1718 void
1719 rtadd(naddr dst,
1720 naddr mask,
1721 u_int state, /* rt_state for the entry */
1722 struct rt_spare *new)
1724 struct rt_entry *rt;
1725 naddr smask;
1726 int i;
1727 struct rt_spare *rts;
1729 rt = (struct rt_entry *)rtmalloc(sizeof (*rt), "rtadd");
1730 memset(rt, 0, sizeof(*rt));
1731 for (rts = rt->rt_spares, i = NUM_SPARES; i != 0; i--, rts++)
1732 rts->rts_metric = HOPCNT_INFINITY;
1734 rt->rt_nodes->rn_key = (caddr_t)&rt->rt_dst_sock;
1735 rt->rt_dst = dst;
1736 rt->rt_dst_sock.sin_family = AF_INET;
1737 #ifdef _HAVE_SIN_LEN
1738 rt->rt_dst_sock.sin_len = dst_sock.sin_len;
1739 #endif
1740 if (mask != HOST_MASK) {
1741 smask = std_mask(dst);
1742 if ((smask & ~mask) == 0 && mask > smask)
1743 state |= RS_SUBNET;
1745 mask_sock.sin_addr.s_addr = htonl(mask);
1746 masktrim(&mask_sock);
1747 rt->rt_mask = mask;
1748 rt->rt_state = state;
1749 rt->rt_spares[0] = *new;
1750 rt->rt_time = now.tv_sec;
1751 rt->rt_poison_metric = HOPCNT_INFINITY;
1752 rt->rt_seqno = update_seqno;
1754 if (++total_routes == MAX_ROUTES)
1755 msglog("have maximum (%d) routes", total_routes);
1756 if (TRACEACTIONS)
1757 trace_add_del("Add", rt);
1759 need_kern.tv_sec = now.tv_sec;
1760 set_need_flash();
1762 if (0 == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock,
1763 rhead, rt->rt_nodes)) {
1764 msglog("rnh_addaddr() failed for %s mask=%#lx",
1765 naddr_ntoa(dst), (u_long)mask);
1766 free(rt);
1771 /* notice a changed route
1773 void
1774 rtchange(struct rt_entry *rt,
1775 u_int state, /* new state bits */
1776 struct rt_spare *new,
1777 char *label)
1779 if (rt->rt_metric != new->rts_metric) {
1780 /* Fix the kernel immediately if it seems the route
1781 * has gone bad, since there may be a working route that
1782 * aggregates this route.
1784 if (new->rts_metric == HOPCNT_INFINITY) {
1785 need_kern.tv_sec = now.tv_sec;
1786 if (new->rts_time >= now.tv_sec - EXPIRE_TIME)
1787 new->rts_time = now.tv_sec - EXPIRE_TIME;
1789 rt->rt_seqno = update_seqno;
1790 set_need_flash();
1793 if (rt->rt_gate != new->rts_gate) {
1794 need_kern.tv_sec = now.tv_sec;
1795 rt->rt_seqno = update_seqno;
1796 set_need_flash();
1799 state |= (rt->rt_state & RS_SUBNET);
1801 /* Keep various things from deciding ageless routes are stale.
1803 if (!AGE_RT(state, new->rts_ifp))
1804 new->rts_time = now.tv_sec;
1806 if (TRACEACTIONS)
1807 trace_change(rt, state, new,
1808 label ? label : "Chg ");
1810 rt->rt_state = state;
1811 rt->rt_spares[0] = *new;
1815 /* check for a better route among the spares
1817 static struct rt_spare *
1818 rts_better(struct rt_entry *rt)
1820 struct rt_spare *rts, *rts1;
1821 int i;
1823 /* find the best alternative among the spares */
1824 rts = rt->rt_spares+1;
1825 for (i = NUM_SPARES, rts1 = rts+1; i > 2; i--, rts1++) {
1826 if (BETTER_LINK(rt,rts1,rts))
1827 rts = rts1;
1830 return rts;
1834 /* switch to a backup route
1836 void
1837 rtswitch(struct rt_entry *rt,
1838 struct rt_spare *rts)
1840 struct rt_spare swap;
1841 char label[20];
1843 /* Do not change permanent routes */
1844 if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC | RS_RDISC
1845 | RS_NET_SYN | RS_IF)))
1846 return;
1848 /* find the best alternative among the spares */
1849 if (rts == 0)
1850 rts = rts_better(rt);
1852 /* Do not bother if it is not worthwhile.
1854 if (!BETTER_LINK(rt, rts, rt->rt_spares))
1855 return;
1857 swap = rt->rt_spares[0];
1858 (void)snprintf(label, sizeof(label), "Use #%d",
1859 (int)(rts - rt->rt_spares));
1860 rtchange(rt, rt->rt_state & ~(RS_NET_SYN | RS_RDISC), rts, label);
1861 if (swap.rts_metric == HOPCNT_INFINITY) {
1862 *rts = rts_empty;
1863 } else {
1864 *rts = swap;
1869 void
1870 rtdelete(struct rt_entry *rt)
1872 struct khash *k;
1875 if (TRACEACTIONS)
1876 trace_add_del("Del", rt);
1878 k = kern_find(rt->rt_dst, rt->rt_mask, 0);
1879 if (k != 0) {
1880 k->k_state |= KS_DELETE;
1881 need_kern.tv_sec = now.tv_sec;
1884 dst_sock.sin_addr.s_addr = rt->rt_dst;
1885 mask_sock.sin_addr.s_addr = htonl(rt->rt_mask);
1886 masktrim(&mask_sock);
1887 if (rt != (struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
1888 rhead)) {
1889 msglog("rnh_deladdr() failed");
1890 } else {
1891 free(rt);
1892 total_routes--;
1897 void
1898 rts_delete(struct rt_entry *rt,
1899 struct rt_spare *rts)
1901 trace_upslot(rt, rts, &rts_empty);
1902 *rts = rts_empty;
1906 /* Get rid of a bad route, and try to switch to a replacement.
1908 void
1909 rtbad(struct rt_entry *rt)
1911 struct rt_spare new;
1913 /* Poison the route */
1914 new = rt->rt_spares[0];
1915 new.rts_metric = HOPCNT_INFINITY;
1916 rtchange(rt, rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC), &new, 0);
1917 rtswitch(rt, 0);
1921 /* Junk a RS_NET_SYN or RS_LOCAL route,
1922 * unless it is needed by another interface.
1924 void
1925 rtbad_sub(struct rt_entry *rt)
1927 struct interface *ifp, *ifp1;
1928 struct intnet *intnetp;
1929 u_int state;
1932 ifp1 = 0;
1933 state = 0;
1935 if (rt->rt_state & RS_LOCAL) {
1936 /* Is this the route through loopback for the interface?
1937 * If so, see if it is used by any other interfaces, such
1938 * as a point-to-point interface with the same local address.
1940 for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
1941 /* Retain it if another interface needs it.
1943 if (ifp->int_addr == rt->rt_ifp->int_addr) {
1944 state |= RS_LOCAL;
1945 ifp1 = ifp;
1946 break;
1952 if (!(state & RS_LOCAL)) {
1953 /* Retain RIPv1 logical network route if there is another
1954 * interface that justifies it.
1956 if (rt->rt_state & RS_NET_SYN) {
1957 for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
1958 if ((ifp->int_state & IS_NEED_NET_SYN)
1959 && rt->rt_mask == ifp->int_std_mask
1960 && rt->rt_dst == ifp->int_std_addr) {
1961 state |= RS_NET_SYN;
1962 ifp1 = ifp;
1963 break;
1968 /* or if there is an authority route that needs it. */
1969 for (intnetp = intnets;
1970 intnetp != 0;
1971 intnetp = intnetp->intnet_next) {
1972 if (intnetp->intnet_addr == rt->rt_dst
1973 && intnetp->intnet_mask == rt->rt_mask) {
1974 state |= (RS_NET_SYN | RS_NET_INT);
1975 break;
1980 if (ifp1 != 0 || (state & RS_NET_SYN)) {
1981 struct rt_spare new = rt->rt_spares[0];
1982 new.rts_ifp = ifp1;
1983 rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state),
1984 &new, 0);
1985 } else {
1986 rtbad(rt);
1991 /* Called while walking the table looking for sick interfaces
1992 * or after a time change.
1994 /* ARGSUSED */
1996 walk_bad(struct radix_node *rn,
1997 struct walkarg *argp UNUSED)
1999 #define RT ((struct rt_entry *)rn)
2000 struct rt_spare *rts;
2001 int i;
2004 /* fix any spare routes through the interface
2006 rts = RT->rt_spares;
2007 for (i = NUM_SPARES; i != 1; i--) {
2008 rts++;
2009 if (rts->rts_metric < HOPCNT_INFINITY
2010 && (rts->rts_ifp == 0
2011 || (rts->rts_ifp->int_state & IS_BROKE)))
2012 rts_delete(RT, rts);
2015 /* Deal with the main route
2017 /* finished if it has been handled before or if its interface is ok
2019 if (RT->rt_ifp == 0 || !(RT->rt_ifp->int_state & IS_BROKE))
2020 return 0;
2022 /* Bad routes for other than interfaces are easy.
2024 if (0 == (RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
2025 rtbad(RT);
2026 return 0;
2029 rtbad_sub(RT);
2030 return 0;
2031 #undef RT
2035 /* Check the age of an individual route.
2037 /* ARGSUSED */
2038 static int
2039 walk_age(struct radix_node *rn,
2040 struct walkarg *argp UNUSED)
2042 #define RT ((struct rt_entry *)rn)
2043 struct interface *ifp;
2044 struct rt_spare *rts;
2045 int i;
2048 /* age all of the spare routes, including the primary route
2049 * currently in use
2051 rts = RT->rt_spares;
2052 for (i = NUM_SPARES; i != 0; i--, rts++) {
2054 ifp = rts->rts_ifp;
2055 if (i == NUM_SPARES) {
2056 if (!AGE_RT(RT->rt_state, ifp)) {
2057 /* Keep various things from deciding ageless
2058 * routes are stale
2060 rts->rts_time = now.tv_sec;
2061 continue;
2064 /* forget RIP routes after RIP has been turned off.
2066 if (rip_sock < 0) {
2067 rtdelete(RT);
2068 return 0;
2072 /* age failing routes
2074 if (age_bad_gate == rts->rts_gate
2075 && rts->rts_time >= now_stale) {
2076 rts->rts_time -= SUPPLY_INTERVAL;
2079 /* trash the spare routes when they go bad */
2080 if (rts->rts_metric < HOPCNT_INFINITY
2081 && now_garbage > rts->rts_time
2082 && i != NUM_SPARES)
2083 rts_delete(RT, rts);
2087 /* finished if the active route is still fresh */
2088 if (now_stale <= RT->rt_time)
2089 return 0;
2091 /* try to switch to an alternative */
2092 rtswitch(RT, 0);
2094 /* Delete a dead route after it has been publically mourned. */
2095 if (now_garbage > RT->rt_time) {
2096 rtdelete(RT);
2097 return 0;
2100 /* Start poisoning a bad route before deleting it. */
2101 if (now.tv_sec - RT->rt_time > EXPIRE_TIME) {
2102 struct rt_spare new = RT->rt_spares[0];
2103 new.rts_metric = HOPCNT_INFINITY;
2104 rtchange(RT, RT->rt_state, &new, 0);
2106 return 0;
2110 /* Watch for dead routes and interfaces.
2112 void
2113 age(naddr bad_gate)
2115 struct interface *ifp;
2116 int need_query = 0;
2118 /* If not listening to RIP, there is no need to age the routes in
2119 * the table.
2121 age_timer.tv_sec = (now.tv_sec
2122 + ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
2124 /* Check for dead IS_REMOTE interfaces by timing their
2125 * transmissions.
2127 for (ifp = ifnet; ifp; ifp = ifp->int_next) {
2128 if (!(ifp->int_state & IS_REMOTE))
2129 continue;
2131 /* ignore unreachable remote interfaces */
2132 if (!check_remote(ifp))
2133 continue;
2135 /* Restore remote interface that has become reachable
2137 if (ifp->int_state & IS_BROKE)
2138 if_ok(ifp, "remote ");
2140 if (ifp->int_act_time != NEVER
2141 && now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
2142 msglog("remote interface %s to %s timed out after"
2143 " %lld:%lld",
2144 ifp->int_name,
2145 naddr_ntoa(ifp->int_dstaddr),
2146 (long long)(now.tv_sec - ifp->int_act_time)/60,
2147 (long long)(now.tv_sec - ifp->int_act_time)%60);
2148 if_sick(ifp);
2151 /* If we have not heard from the other router
2152 * recently, ask it.
2154 if (now.tv_sec >= ifp->int_query_time) {
2155 ifp->int_query_time = NEVER;
2156 need_query = 1;
2160 /* Age routes. */
2161 age_bad_gate = bad_gate;
2162 (void)rn_walktree(rhead, walk_age, 0);
2164 /* delete old redirected routes to keep the kernel table small
2165 * and prevent blackholes
2167 del_redirects(bad_gate, now.tv_sec-STALE_TIME);
2169 /* Update the kernel routing table. */
2170 fix_kern();
2172 /* poke reticent remote gateways */
2173 if (need_query)
2174 rip_query();