2 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
5 * Copyright (c) 1988, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)radix.c 8.5 (Berkeley) 5/19/95
33 * $FreeBSD: /repoman/r/ncvs/src/sys/net/radix.c,v 1.36.2.1 2005/01/31 23:26:23
39 * Routines to build and maintain radix trees for routing lookups.
41 #include <sys/types.h>
44 #include <sys/param.h>
47 #include <sys/mutex.h>
48 #include <sys/systm.h>
49 #include <sys/cmn_err.h>
58 #include <net/radix.h>
63 panic(const char *str
)
65 fprintf(stderr
, "Panic - %s\n", str
);
70 static int rn_walktree(struct radix_node_head
*, walktree_f_t
*, void *);
71 static int rn_walktree_mt(struct radix_node_head
*, walktree_f_t
*,
72 void *, lockf_t
, lockf_t
);
73 static struct radix_node
74 *rn_insert(void *, struct radix_node_head
*, int *,
75 struct radix_node
[2]),
76 *rn_newpair(void *, int, struct radix_node
[2]),
77 *rn_search(void *, struct radix_node
*),
78 *rn_search_m(void *, struct radix_node
*, void *),
79 *rn_lookup(void *, void *, struct radix_node_head
*),
80 *rn_match(void *, struct radix_node_head
*),
81 *rn_match_args(void *, struct radix_node_head
*, match_leaf_t
*,
83 *rn_addmask(void *, int, int),
84 *rn_addroute(void *, void *, struct radix_node_head
*,
85 struct radix_node
[2]),
86 *rn_delete(void *, void *, struct radix_node_head
*);
87 static boolean_t
rn_refines(void *, void *);
90 * IPF also uses PATRICIA tree to manage ippools. IPF stores its own structure
91 * addrfamily_t. sizeof (addrfamily_t) == 24.
94 static int max_keylen
= MAX_KEYLEN
;
97 static struct kmem_cache
*radix_mask_cache
; /* for rn_mkfreelist */
98 static struct kmem_cache
*radix_node_cache
;
100 static char *radix_mask_cache
, *radix_node_cache
; /* dummy vars. never inited */
103 static struct radix_mask
*rn_mkfreelist
;
104 static struct radix_node_head
*mask_rnhead
;
106 * Work area -- the following point to 2 buffers of size max_keylen,
107 * allocated in this order in a block of memory malloc'ed by rn_init.
108 * A third buffer of size MAX_KEYLEN is allocated from the stack.
110 static char *rn_zeros
, *rn_ones
;
112 #define MKGet(m) R_Malloc(m, radix_mask_cache, sizeof (struct radix_mask))
113 #define MKFree(m) Free(m, radix_mask_cache)
114 #define rn_masktop (mask_rnhead->rnh_treetop)
116 static boolean_t
rn_lexobetter(void *m_arg
, void *n_arg
);
117 static struct radix_mask
*
118 rn_new_radix_mask(struct radix_node
*tt
,
119 struct radix_mask
*next
);
121 rn_satisfies_leaf(char *trial
, struct radix_node
*leaf
,
122 int skip
, match_leaf_t
*rn_leaf_fn
, void *rn_leaf_arg
);
124 #define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg))
127 * The data structure for the keys is a radix tree with one way
128 * branching removed. The index rn_bit at an internal node n represents a bit
129 * position to be tested. The tree is arranged so that all descendants
130 * of a node n have keys whose bits all agree up to position rn_bit - 1.
131 * (We say the index of n is rn_bit.)
133 * There is at least one descendant which has a one bit at position rn_bit,
134 * and at least one with a zero there.
136 * A route is determined by a pair of key and mask. We require that the
137 * bit-wise logical and of the key and mask to be the key.
138 * We define the index of a route associated with the mask to be
139 * the first bit number in the mask where 0 occurs (with bit number 0
140 * representing the highest order bit).
142 * We say a mask is normal if every bit is 0, past the index of the mask.
143 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
144 * and m is a normal mask, then the route applies to every descendant of n.
145 * If the index(m) < rn_bit, this implies the trailing last few bits of k
146 * before bit b are all 0, (and hence consequently true of every descendant
147 * of n), so the route applies to all descendants of the node as well.
149 * Similar logic shows that a non-normal mask m such that
150 * index(m) <= index(n) could potentially apply to many children of n.
151 * Thus, for each non-host route, we attach its mask to a list at an internal
152 * node as high in the tree as we can go.
154 * The present version of the code makes use of normal routes in short-
155 * circuiting an explict mask and compare operation when testing whether
156 * a key satisfies a normal route, and also in remembering the unique leaf
157 * that governs a subtree.
161 * Most of the functions in this code assume that the key/mask arguments
162 * are sockaddr-like structures, where the first byte is an uchar_t
163 * indicating the size of the entire structure.
165 * To make the assumption more explicit, we use the LEN() macro to access
166 * this field. It is safe to pass an expression with side effects
167 * to LEN() as the argument is evaluated only once.
169 #define LEN(x) (*(const uchar_t *)(x))
173 * Search a node in the tree matching the key.
175 static struct radix_node
*
176 rn_search(v_arg
, head
)
178 struct radix_node
*head
;
180 struct radix_node
*x
;
183 for (x
= head
, v
= v_arg
; x
->rn_bit
>= 0; ) {
184 if (x
->rn_bmask
& v
[x
->rn_offset
])
193 * Same as above, but with an additional mask.
195 static struct radix_node
*
196 rn_search_m(v_arg
, head
, m_arg
)
197 struct radix_node
*head
;
200 struct radix_node
*x
;
201 caddr_t v
= v_arg
, m
= m_arg
;
203 for (x
= head
; x
->rn_bit
>= 0; ) {
204 if ((x
->rn_bmask
& m
[x
->rn_offset
]) &&
205 (x
->rn_bmask
& v
[x
->rn_offset
]))
214 * Returns true if there are no bits set in n_arg that are zero in
215 * m_arg and the masks aren't equal. In other words, it returns true
216 * when m_arg is a finer-granularity netmask -- it represents a subset
217 * of the destinations implied by n_arg.
220 rn_refines(m_arg
, n_arg
)
223 caddr_t m
= m_arg
, n
= n_arg
;
224 caddr_t lim
= n
+ LEN(n
), lim2
= lim
;
225 int longer
= LEN(n
++) - (int)LEN(m
++);
226 boolean_t masks_are_equal
= B_TRUE
;
234 masks_are_equal
= B_FALSE
;
239 if (masks_are_equal
&& (longer
< 0))
240 for (lim2
= m
- longer
; m
< lim2
; )
243 return (!masks_are_equal
);
246 static struct radix_node
*
247 rn_lookup(v_arg
, m_arg
, head
)
249 struct radix_node_head
*head
;
251 struct radix_node
*x
;
252 caddr_t netmask
= NULL
;
255 x
= rn_addmask(m_arg
, 1, head
->rnh_treetop
->rn_offset
);
260 x
= rn_match(v_arg
, head
);
262 while (x
&& x
->rn_mask
!= netmask
)
269 * Returns true if address 'trial' has no bits differing from the
270 * leaf's key when compared under the leaf's mask. In other words,
271 * returns true when 'trial' matches leaf.
272 * In addition, if a rn_leaf_fn is passed in, that is used to find
273 * a match on conditions defined by the caller of rn_match. This is
274 * used by the kernel ftable to match on IRE_MATCH_* conditions.
277 rn_satisfies_leaf(trial
, leaf
, skip
, rn_leaf_fn
, rn_leaf_arg
)
279 struct radix_node
*leaf
;
281 match_leaf_t
*rn_leaf_fn
;
284 char *cp
= trial
, *cp2
= leaf
->rn_key
, *cp3
= leaf
->rn_mask
;
286 int length
= min(LEN(cp
), LEN(cp2
));
291 length
= min(length
, LEN(cp3
));
296 for (cp
+= skip
; cp
< cplim
; cp
++, cp2
++, cp3
++)
297 if ((*cp
^ *cp2
) & *cp3
)
300 return (RN_MATCHF(leaf
, rn_leaf_fn
, rn_leaf_arg
));
303 static struct radix_node
*
304 rn_match(v_arg
, head
)
306 struct radix_node_head
*head
;
308 return (rn_match_args(v_arg
, head
, NULL
, NULL
));
311 static struct radix_node
*
312 rn_match_args(v_arg
, head
, rn_leaf_fn
, rn_leaf_arg
)
314 struct radix_node_head
*head
;
315 match_leaf_t
*rn_leaf_fn
;
319 struct radix_node
*t
= head
->rnh_treetop
, *x
;
322 struct radix_node
*saved_t
, *top
= t
;
323 int off
= t
->rn_offset
, vlen
= LEN(cp
), matched_off
;
327 * Open code rn_search(v, top) to avoid overhead of extra
330 for (; t
->rn_bit
>= 0; ) {
331 if (t
->rn_bmask
& cp
[t
->rn_offset
])
337 * See if we match exactly as a host destination
338 * or at least learn how many bits match, for normal mask finesse.
340 * It doesn't hurt us to limit how many bytes to check
341 * to the length of the mask, since if it matches we had a genuine
342 * match and the leaf we have is the most specific one anyway;
343 * if it didn't match with a shorter length it would fail
344 * with a long one. This wins big for class B&C netmasks which
345 * are probably the most common case...
348 vlen
= LEN(t
->rn_mask
);
349 cp
+= off
; cp2
= t
->rn_key
+ off
; cplim
= v
+ vlen
;
350 for (; cp
< cplim
; cp
++, cp2
++)
354 * This extra grot is in case we are explicitly asked
355 * to look up the default. Ugh!
357 * Never return the root node itself, it seems to cause a
360 if (t
->rn_flags
& RNF_ROOT
)
362 if (t
== NULL
|| RN_MATCHF(t
, rn_leaf_fn
, rn_leaf_arg
)) {
366 * Although we found an exact match on the key, rn_leaf_fn
367 * is looking for some other criteria as well. Continue
368 * looking as if the exact match failed.
370 if (t
->rn_dupedkey
== NULL
&&
371 (t
->rn_parent
->rn_flags
& RNF_ROOT
)) {
372 /* no more dupedkeys and hit the top. have to give up */
380 test
= (*cp
^ *cp2
) & 0xff; /* find first bit that differs */
381 for (b
= 7; (test
>>= 1) > 0; )
384 matched_off
= cp
- v
;
385 b
+= matched_off
<< 3;
389 * If there is a host route in a duped-key chain, it will be first.
391 if ((saved_t
= t
)->rn_mask
== 0)
393 for (; t
!= NULL
; t
= t
->rn_dupedkey
) {
395 * Even if we don't match exactly as a host,
396 * we may match if the leaf we wound up at is
400 if (t
->rn_flags
& RNF_NORMAL
) {
401 if ((rn_bit
<= t
->rn_bit
) &&
402 RN_MATCHF(t
, rn_leaf_fn
, rn_leaf_arg
)) {
405 } else if (rn_satisfies_leaf(v
, t
, matched_off
, rn_leaf_fn
,
411 /* start searching up the tree */
413 struct radix_mask
*m
;
418 * If non-contiguous masks ever become important
419 * we can restore the masking and open coding of
420 * the search and satisfaction test and put the
421 * calculation of "off" back before the "do".
424 if (m
->rm_flags
& RNF_NORMAL
) {
425 if ((rn_bit
<= m
->rm_bit
) &&
426 RN_MATCHF(m
->rm_leaf
, rn_leaf_fn
,
431 off
= min(t
->rn_offset
, matched_off
);
432 x
= rn_search_m(v
, t
, m
->rm_mask
);
433 while (x
!= NULL
&& x
->rn_mask
!= m
->rm_mask
)
435 if (x
&& rn_satisfies_leaf(v
, x
, off
,
436 rn_leaf_fn
, rn_leaf_arg
)) {
447 * Whenever we add a new leaf to the tree, we also add a parent node,
448 * so we allocate them as an array of two elements: the first one must be
449 * the leaf (see RNTORT() in route.c), the second one is the parent.
450 * This routine initializes the relevant fields of the nodes, so that
451 * the leaf is the left child of the parent node, and both nodes have
452 * (almost) all all fields filled as appropriate.
453 * The function returns a pointer to the parent node.
456 static struct radix_node
*
457 rn_newpair(v
, b
, nodes
)
460 struct radix_node nodes
[2];
462 struct radix_node
*tt
= nodes
, *t
= tt
+ 1;
465 t
->rn_bmask
= 0x80 >> (b
& 7);
467 t
->rn_offset
= b
>> 3;
470 * t->rn_parent, r->rn_right, tt->rn_mask, tt->rn_dupedkey
471 * and tt->rn_bmask must have been zeroed by caller.
476 tt
->rn_flags
= t
->rn_flags
= RNF_ACTIVE
;
477 tt
->rn_mklist
= t
->rn_mklist
= 0;
481 static struct radix_node
*
482 rn_insert(v_arg
, head
, dupentry
, nodes
)
484 struct radix_node_head
*head
;
486 struct radix_node nodes
[2];
489 struct radix_node
*top
= head
->rnh_treetop
;
490 struct radix_node
*p
, *x
;
491 int head_off
= top
->rn_offset
, vlen
= (int)LEN(v
);
492 struct radix_node
*t
= rn_search(v_arg
, top
);
493 caddr_t cp
= v
+ head_off
;
495 struct radix_node
*tt
;
496 caddr_t cp2
= t
->rn_key
+ head_off
;
498 caddr_t cplim
= v
+ vlen
;
501 * Find first bit at which v and t->rn_key differ
510 cmp_res
= (cp
[-1] ^ cp2
[-1]) & 0xff;
512 * (cp - v) is the number of bytes where the match is relevant.
513 * Multiply by 8 to get number of bits. Then reduce this number
514 * by the trailing bits in the last byte where we have a match
515 * by looking at (cmp_res >> 1) in each iteration below.
516 * Note that v starts at the beginning of the key, so, when key
517 * is a sockaddr structure, the preliminary len/family/port bytes
520 for (b
= (cp
- v
) << 3; cmp_res
; b
--)
526 if (cp
[x
->rn_offset
] & x
->rn_bmask
)
530 } while (b
> (unsigned)x
->rn_bit
);
531 /* x->rn_bit < b && x->rn_bit >= 0 */
533 * now the rightmost bit where v and rn_key differ (b) is <
536 * We will add a new branch at p. b cannot equal x->rn_bit
537 * because we know we didn't find a duplicated key.
538 * The tree will be re-adjusted so that t is inserted between p
541 t
= rn_newpair(v_arg
, b
, nodes
);
543 if ((cp
[p
->rn_offset
] & p
->rn_bmask
) == 0)
549 if ((cp
[t
->rn_offset
] & t
->rn_bmask
) == 0) {
558 static struct radix_node
*
559 rn_addmask(n_arg
, search
, skip
)
563 caddr_t netmask
= (caddr_t
)n_arg
;
564 struct radix_node
*x
;
567 int maskduplicated
, m0
, isnormal
;
568 struct radix_node
*saved_x
;
570 char addmask_key
[MAX_KEYLEN
];
572 if ((mlen
= LEN(netmask
)) > max_keylen
)
577 return (mask_rnhead
->rnh_nodes
);
579 bcopy(rn_ones
+ 1, addmask_key
+ 1, skip
- 1);
580 if ((m0
= mlen
) > skip
)
581 bcopy(netmask
+ skip
, addmask_key
+ skip
, mlen
- skip
);
583 * Trim trailing zeroes.
585 for (cp
= addmask_key
+ mlen
; (cp
> addmask_key
) && cp
[-1] == 0; )
587 mlen
= cp
- addmask_key
;
589 if (m0
>= last_zeroed
)
591 return (mask_rnhead
->rnh_nodes
);
593 if (m0
< last_zeroed
)
594 bzero(addmask_key
+ m0
, last_zeroed
- m0
);
595 *addmask_key
= last_zeroed
= mlen
;
596 x
= rn_search(addmask_key
, rn_masktop
);
597 if (bcmp(addmask_key
, x
->rn_key
, mlen
) != 0)
601 R_Zalloc(x
, radix_node_cache
, max_keylen
+ 2 * sizeof (*x
));
603 if ((saved_x
= x
) == 0)
605 netmask
= cp
= (caddr_t
)(x
+ 2);
606 bcopy(addmask_key
, cp
, mlen
);
607 x
= rn_insert(cp
, mask_rnhead
, &maskduplicated
, x
);
608 if (maskduplicated
) {
610 cmn_err(CE_WARN
, "rn_addmask: mask impossibly already in tree");
612 syslog(LOG_ERR
, "rn_addmask: mask impossibly already in tree");
614 Free(saved_x
, radix_node_cache
);
618 * Calculate index of mask, and check for normalcy.
619 * First find the first byte with a 0 bit, then if there are
620 * more bits left (remember we already trimmed the trailing 0's),
621 * the pattern must be one of those in normal_chars[], or we have
622 * a non-contiguous mask.
624 cplim
= netmask
+ mlen
;
626 for (cp
= netmask
+ skip
; (cp
< cplim
) && *(uchar_t
*)cp
== 0xff; )
629 static uint8_t normal_chars
[] = {
630 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
632 for (j
= 0x80; (j
& *cp
) != 0; j
>>= 1)
634 if (*cp
!= normal_chars
[b
] || cp
!= (cplim
- 1))
637 b
+= (cp
- netmask
) << 3;
640 x
->rn_flags
|= RNF_NORMAL
;
644 /* arbitrary ordering for non-contiguous masks */
646 rn_lexobetter(m_arg
, n_arg
)
649 uchar_t
*mp
= m_arg
, *np
= n_arg
, *lim
;
651 if (LEN(mp
) > LEN(np
))
652 /* not really, but need to check longer one first */
654 if (LEN(mp
) == LEN(np
))
655 for (lim
= mp
+ LEN(mp
); mp
< lim
; )
661 static struct radix_mask
*
662 rn_new_radix_mask(tt
, next
)
663 struct radix_node
*tt
;
664 struct radix_mask
*next
;
666 struct radix_mask
*m
;
671 syslog(LOG_ERR
, "Mask for route not entered\n");
675 bzero(m
, sizeof (*m
));
676 m
->rm_bit
= tt
->rn_bit
;
677 m
->rm_flags
= tt
->rn_flags
;
678 if (tt
->rn_flags
& RNF_NORMAL
)
681 m
->rm_mask
= tt
->rn_mask
;
687 static struct radix_node
*
688 rn_addroute(v_arg
, n_arg
, head
, treenodes
)
690 struct radix_node_head
*head
;
691 struct radix_node treenodes
[2];
693 caddr_t v
= (caddr_t
)v_arg
, netmask
= (caddr_t
)n_arg
;
694 struct radix_node
*t
, *x
= 0, *tt
;
695 struct radix_node
*saved_tt
, *top
= head
->rnh_treetop
;
696 short b
= 0, b_leaf
= 0;
699 struct radix_mask
*m
, **mp
;
702 * In dealing with non-contiguous masks, there may be
703 * many different routes which have the same mask.
704 * We will find it useful to have a unique pointer to
705 * the mask to speed avoiding duplicate references at
706 * nodes and possibly save time in calculating indices.
709 if ((x
= rn_addmask(netmask
, 0, top
->rn_offset
)) == 0)
716 * Deal with duplicated keys: attach node to previous instance
718 saved_tt
= tt
= rn_insert(v
, head
, &keyduplicated
, treenodes
);
720 for (t
= tt
; tt
; t
= tt
, tt
= tt
->rn_dupedkey
) {
721 if (tt
->rn_mask
== netmask
)
725 /* index (netmask) > node */
726 ((b_leaf
< tt
->rn_bit
) ||
727 rn_refines(netmask
, tt
->rn_mask
) ||
728 rn_lexobetter(netmask
, tt
->rn_mask
))))
732 * If the mask is not duplicated, we wouldn't
733 * find it among possible duplicate key entries
734 * anyway, so the above test doesn't hurt.
736 * Insert treenodes before tt.
738 * We sort the masks for a duplicated key the same way as
739 * in a masklist -- most specific to least specific.
740 * This may require the unfortunate nuisance of relocating
741 * the head of the list.
743 * We also reverse, or doubly link the list through the
746 if (tt
== saved_tt
) {
747 struct radix_node
*xx
= x
;
748 /* link in at head of list */
749 (tt
= treenodes
)->rn_dupedkey
= t
;
750 tt
->rn_flags
= t
->rn_flags
;
751 tt
->rn_parent
= x
= t
->rn_parent
;
752 t
->rn_parent
= tt
; /* parent */
757 saved_tt
= tt
; x
= xx
;
759 (tt
= treenodes
)->rn_dupedkey
= t
->rn_dupedkey
;
761 /* Set rn_parent value for tt and tt->rn_dupedkey */
764 tt
->rn_dupedkey
->rn_parent
= tt
;
768 tt
->rn_flags
= RNF_ACTIVE
;
774 tt
->rn_mask
= netmask
;
775 tt
->rn_bit
= x
->rn_bit
;
776 tt
->rn_flags
|= x
->rn_flags
& RNF_NORMAL
;
780 * at this point the parent-child relationship for p, t, x, tt is
781 * one of the following:
783 * : (left/right child) :
789 * tt == saved_tt returned by rn_insert().
792 t
= saved_tt
->rn_parent
;
795 b_leaf
= -1 - t
->rn_bit
;
797 * b_leaf is now normalized to be in the leaf rn_bit format
798 * (it is the rn_bit value of a leaf corresponding to netmask
801 if (t
->rn_right
== saved_tt
)
806 * Promote general routes from below.
807 * Identify the less specific netmasks and add them to t->rm_mklist
810 /* x is the sibling node. it is a leaf node. */
811 for (mp
= &t
->rn_mklist
; x
; x
= x
->rn_dupedkey
)
812 if (x
->rn_mask
&& (x
->rn_bit
>= b_leaf
) &&
815 * x is the first node in the dupedkey chain
816 * without a mklist, and with a shorter mask
817 * than b_leaf. Create a radix_mask
818 * corresponding to x's mask and add it to
819 * t's rn_mklist. The mask list gets created
820 * in decreasing order of mask length.
822 *mp
= m
= rn_new_radix_mask(x
, 0);
826 } else if (x
->rn_mklist
) {
828 * Skip over masks whose index is > that of new node
830 for (mp
= &x
->rn_mklist
; (m
= *mp
) != NULL
; mp
= &m
->rm_mklist
)
831 if (m
->rm_bit
>= b_leaf
)
833 t
->rn_mklist
= m
; *mp
= 0;
836 /* Add new route to highest possible ancestor's list */
837 if ((netmask
== 0) || (b
> t
->rn_bit
))
838 return (tt
); /* can't lift at all */
840 /* b is the index of the netmask */
844 } while (b
<= t
->rn_bit
&& x
!= top
);
846 * Search through routes associated with node to
847 * insert new route according to index.
848 * Need same criteria as when sorting dupedkeys to avoid
849 * double loop on deletion.
851 for (mp
= &x
->rn_mklist
; (m
= *mp
) != NULL
; mp
= &m
->rm_mklist
) {
852 if (m
->rm_bit
< b_leaf
)
854 if (m
->rm_bit
> b_leaf
)
856 if (m
->rm_flags
& RNF_NORMAL
) {
857 mmask
= m
->rm_leaf
->rn_mask
;
858 if (tt
->rn_flags
& RNF_NORMAL
) {
860 cmn_err(CE_WARN
, "Non-unique normal route, "
861 "mask not entered\n");
863 syslog(LOG_ERR
, "Non-unique normal route, "
864 "mask not entered\n");
870 if (mmask
== netmask
) {
875 if (rn_refines(netmask
, mmask
) ||
876 rn_lexobetter(netmask
, mmask
))
879 *mp
= rn_new_radix_mask(tt
, *mp
);
883 static struct radix_node
*
884 rn_delete(v_arg
, netmask_arg
, head
)
885 void *v_arg
, *netmask_arg
;
886 struct radix_node_head
*head
;
888 struct radix_node
*t
, *p
, *x
, *tt
;
889 struct radix_mask
*m
, *saved_m
, **mp
;
890 struct radix_node
*dupedkey
, *saved_tt
, *top
;
892 int b
, head_off
, vlen
;
895 netmask
= netmask_arg
;
896 x
= head
->rnh_treetop
;
897 tt
= rn_search(v
, x
);
898 head_off
= x
->rn_offset
;
903 bcmp(v
+ head_off
, tt
->rn_key
+ head_off
, vlen
- head_off
))
906 * Delete our route from mask lists.
909 if ((x
= rn_addmask(netmask
, 1, head_off
)) == 0)
912 while (tt
->rn_mask
!= netmask
)
913 if ((tt
= tt
->rn_dupedkey
) == 0)
916 if (tt
->rn_mask
== 0 || (saved_m
= m
= tt
->rn_mklist
) == 0)
918 if (tt
->rn_flags
& RNF_NORMAL
) {
919 if (m
->rm_leaf
!= tt
|| m
->rm_refs
> 0) {
922 "rn_delete: inconsistent annotation\n");
924 syslog(LOG_ERR
, "rn_delete: inconsistent annotation\n");
926 return (0); /* dangling ref could cause disaster */
929 if (m
->rm_mask
!= tt
->rn_mask
) {
932 "rn_delete: inconsistent annotation 2\n");
935 "rn_delete: inconsistent annotation 2\n");
939 if (--m
->rm_refs
>= 0)
943 t
= saved_tt
->rn_parent
;
945 goto on1
; /* Wasn't lifted at all */
949 } while (b
<= t
->rn_bit
&& x
!= top
);
950 for (mp
= &x
->rn_mklist
; (m
= *mp
) != NULL
; mp
= &m
->rm_mklist
)
958 cmn_err(CE_WARN
, "rn_delete: couldn't find our annotation\n");
960 syslog(LOG_ERR
, "rn_delete: couldn't find our annotation\n");
962 if (tt
->rn_flags
& RNF_NORMAL
)
963 return (0); /* Dangling ref to us */
967 * Eliminate us from tree
969 if (tt
->rn_flags
& RNF_ROOT
)
972 dupedkey
= saved_tt
->rn_dupedkey
;
975 * Here, tt is the deletion target and
976 * saved_tt is the head of the dupekey chain.
978 if (tt
== saved_tt
) {
979 /* remove from head of chain */
980 x
= dupedkey
; x
->rn_parent
= t
;
981 if (t
->rn_left
== tt
)
986 /* find node in front of tt on the chain */
987 for (x
= p
= saved_tt
; p
&& p
->rn_dupedkey
!= tt
; )
990 p
->rn_dupedkey
= tt
->rn_dupedkey
;
991 if (tt
->rn_dupedkey
) /* parent */
992 tt
->rn_dupedkey
->rn_parent
= p
;
997 "rn_delete: couldn't find us\n");
1000 "rn_delete: couldn't find us\n");
1001 #endif /* _KERNEL */
1004 if (t
->rn_flags
& RNF_ACTIVE
) {
1007 if (p
->rn_left
== t
)
1011 x
->rn_left
->rn_parent
= x
;
1012 x
->rn_right
->rn_parent
= x
;
1016 if (t
->rn_left
== tt
)
1021 if (p
->rn_right
== t
)
1027 * Demote routes attached to us.
1030 if (x
->rn_bit
>= 0) {
1031 for (mp
= &x
->rn_mklist
; (m
= *mp
) != NULL
; )
1036 * If there are any key,mask pairs in a sibling
1037 * duped-key chain, some subset will appear sorted
1038 * in the same order attached to our mklist
1040 for (m
= t
->rn_mklist
; m
&& x
; x
= x
->rn_dupedkey
)
1041 if (m
== x
->rn_mklist
) {
1042 struct radix_mask
*mm
= m
->rm_mklist
;
1044 if (--(m
->rm_refs
) < 0)
1051 "rn_delete: Orphaned Mask %p at %p\n",
1052 (void *)m
, (void *)x
);
1055 "rn_delete: Orphaned Mask %p at %p\n",
1056 (void *)m
, (void *)x
);
1057 #endif /* _KERNEL */
1061 * We may be holding an active internal node in the tree.
1066 t
->rn_left
->rn_parent
= t
;
1067 t
->rn_right
->rn_parent
= t
;
1069 if (p
->rn_left
== x
)
1075 tt
->rn_flags
&= ~RNF_ACTIVE
;
1076 tt
[1].rn_flags
&= ~RNF_ACTIVE
;
1081 * Walk the radix tree; For the kernel routing table, we hold additional
1082 * refs on the ire_bucket to ensure that the walk function f() does not
1083 * run into trashed memory. The kernel routing table is identified by
1084 * a rnh_treetop that has RNF_SUNW_FT set in the rn_flags.
1085 * Note that all refs takein in rn_walktree are released before it returns,
1086 * so that f() will need to take any additional references on memory
1087 * to be passed back to the caller of rn_walktree.
1090 rn_walktree(h
, f
, w
)
1091 struct radix_node_head
*h
;
1095 return (rn_walktree_mt(h
, f
, w
, NULL
, NULL
));
1098 rn_walktree_mt(h
, f
, w
, lockf
, unlockf
)
1099 struct radix_node_head
*h
;
1102 lockf_t lockf
, unlockf
;
1105 struct radix_node
*base
, *next
;
1106 struct radix_node
*rn
= h
->rnh_treetop
;
1107 boolean_t is_mt
= B_FALSE
;
1109 if (lockf
!= NULL
) {
1110 ASSERT(unlockf
!= NULL
);
1114 * This gets complicated because we may delete the node
1115 * while applying the function f to it, so we need to calculate
1116 * the successor node in advance.
1118 RADIX_NODE_HEAD_RLOCK(h
);
1119 /* First time through node, go left */
1120 while (rn
->rn_bit
>= 0) {
1129 /* If at right child go back up, otherwise, go right */
1130 while (rn
->rn_parent
->rn_right
== rn
&&
1131 (rn
->rn_flags
& RNF_ROOT
) == 0) {
1134 /* Find the next *leaf* since next node might vanish, too */
1135 for (rn
= rn
->rn_parent
->rn_right
; rn
->rn_bit
>= 0; ) {
1140 if (is_mt
&& next
!= NULL
)
1143 /* Process leaves */
1144 while ((rn
= base
) != NULL
) {
1145 base
= rn
->rn_dupedkey
;
1147 if (is_mt
&& base
!= NULL
)
1150 RADIX_NODE_HEAD_UNLOCK(h
);
1151 if (!(rn
->rn_flags
& RNF_ROOT
) &&
1152 (error
= (*f
)(rn
, w
))) {
1164 RADIX_NODE_HEAD_RLOCK(h
);
1167 if (rn
->rn_flags
& RNF_ROOT
) {
1168 RADIX_NODE_HEAD_UNLOCK(h
);
1170 * no ref to release, since we never take a ref
1171 * on the root node- it can't be deleted.
1180 * Allocate and initialize an empty tree. This has 3 nodes, which are
1181 * part of the radix_node_head (in the order <left,root,right>) and are
1182 * marked RNF_ROOT so they cannot be freed.
1183 * The leaves have all-zero and all-one keys, with significant
1184 * bits starting at 'off'.
1185 * Return 1 on success, 0 on error.
1188 rn_inithead(head
, off
)
1192 struct radix_node_head
*rnh
;
1193 struct radix_node
*t
, *tt
, *ttt
;
1196 R_ZallocSleep(rnh
, struct radix_node_head
*, sizeof (*rnh
));
1200 RADIX_NODE_HEAD_LOCK_INIT(rnh
);
1203 t
= rn_newpair(rn_zeros
, off
, rnh
->rnh_nodes
);
1204 ttt
= rnh
->rnh_nodes
+ 2;
1207 tt
= t
->rn_left
; /* ... which in turn is rnh->rnh_nodes */
1208 tt
->rn_flags
= t
->rn_flags
= RNF_ROOT
| RNF_ACTIVE
;
1209 tt
->rn_bit
= -1 - off
;
1211 ttt
->rn_key
= rn_ones
;
1212 rnh
->rnh_addaddr
= rn_addroute
;
1213 rnh
->rnh_deladdr
= rn_delete
;
1214 rnh
->rnh_matchaddr
= rn_match
;
1215 rnh
->rnh_matchaddr_args
= rn_match_args
;
1216 rnh
->rnh_lookup
= rn_lookup
;
1217 rnh
->rnh_walktree
= rn_walktree
;
1218 rnh
->rnh_walktree_mt
= rn_walktree_mt
;
1219 rnh
->rnh_walktree_from
= NULL
; /* not implemented */
1220 rnh
->rnh_treetop
= t
;
1230 radix_mask_cache
= kmem_cache_create("radix_mask",
1231 sizeof (struct radix_mask
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
1232 radix_node_cache
= kmem_cache_create("radix_node",
1233 max_keylen
+ 2 * sizeof (struct radix_node
),
1234 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
1235 #endif /* _KERNEL */
1236 R_ZallocSleep(rn_zeros
, char *, 2 * max_keylen
);
1238 ASSERT(rn_zeros
!= NULL
);
1239 bzero(rn_zeros
, 2 * max_keylen
);
1240 rn_ones
= cp
= rn_zeros
+ max_keylen
;
1241 cplim
= rn_ones
+ max_keylen
;
1244 if (rn_inithead((void **)(void *)&mask_rnhead
, 0) == 0)
1245 panic("rn_init: could not init mask_rnhead ");
1250 struct radix_node
*n
;
1253 struct radix_node_head
*rnh
= p
;
1254 struct radix_node
*d
;
1256 d
= rnh
->rnh_deladdr(n
->rn_key
, NULL
, rnh
);
1258 Free(d
, radix_node_cache
);
1266 struct radix_node_head
*rnh
;
1268 (void) rn_walktree(rnh
, rn_freenode
, rnh
);
1270 rnh
->rnh_addaddr
= NULL
;
1271 rnh
->rnh_deladdr
= NULL
;
1272 rnh
->rnh_matchaddr
= NULL
;
1273 rnh
->rnh_lookup
= NULL
;
1274 rnh
->rnh_walktree
= NULL
;
1277 RADIX_NODE_HEAD_DESTROY(rnh
);
1278 FreeHead(rnh
, sizeof (*rnh
));
1281 #endif /* _KERNEL */
1287 struct radix_mask
*m
;
1289 if (rn_zeros
!= NULL
) {
1291 FreeHead(rn_zeros
, 2 * max_keylen
);
1293 Free(rn_zeros
, NULL
);
1299 if (mask_rnhead
!= NULL
) {
1300 rn_freehead(mask_rnhead
);
1304 while ((m
= rn_mkfreelist
) != NULL
) {
1305 rn_mkfreelist
= m
->rm_mklist
;