import less(1)
[unleashed/tickless.git] / usr / src / common / net / patricia / radix.c
blobcf2085280fa0564af4c8917625e994148e35d9f6
1 /*
2 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
5 * Copyright (c) 1988, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 * @(#)radix.c 8.5 (Berkeley) 5/19/95
33 * $FreeBSD: /repoman/r/ncvs/src/sys/net/radix.c,v 1.36.2.1 2005/01/31 23:26:23
34 * imp Exp $
39 * Routines to build and maintain radix trees for routing lookups.
41 #include <sys/types.h>
43 #ifndef _RADIX_H_
44 #include <sys/param.h>
45 #ifdef _KERNEL
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/systm.h>
49 #include <sys/cmn_err.h>
50 #else
51 #include <assert.h>
52 #define ASSERT assert
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <syslog.h>
56 #include <strings.h>
57 #endif /* _KERNEL */
58 #include <net/radix.h>
59 #endif
61 #ifndef _KERNEL
62 void
63 panic(const char *str)
65 fprintf(stderr, "Panic - %s\n", str);
66 abort();
68 #endif /* _KERNEL */
70 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
71 static int rn_walktree_mt(struct radix_node_head *, walktree_f_t *,
72 void *, lockf_t, lockf_t);
73 static struct radix_node
74 *rn_insert(void *, struct radix_node_head *, int *,
75 struct radix_node [2]),
76 *rn_newpair(void *, int, struct radix_node[2]),
77 *rn_search(void *, struct radix_node *),
78 *rn_search_m(void *, struct radix_node *, void *),
79 *rn_lookup(void *, void *, struct radix_node_head *),
80 *rn_match(void *, struct radix_node_head *),
81 *rn_match_args(void *, struct radix_node_head *, match_leaf_t *,
82 void *),
83 *rn_addmask(void *, int, int),
84 *rn_addroute(void *, void *, struct radix_node_head *,
85 struct radix_node [2]),
86 *rn_delete(void *, void *, struct radix_node_head *);
87 static boolean_t rn_refines(void *, void *);
90 * IPF also uses PATRICIA tree to manage ippools. IPF stores its own structure
91 * addrfamily_t. sizeof (addrfamily_t) == 24.
93 #define MAX_KEYLEN 24
94 static int max_keylen = MAX_KEYLEN;
96 #ifdef _KERNEL
97 static struct kmem_cache *radix_mask_cache; /* for rn_mkfreelist */
98 static struct kmem_cache *radix_node_cache;
99 #else
100 static char *radix_mask_cache, *radix_node_cache; /* dummy vars. never inited */
101 #endif /* _KERNEL */
103 static struct radix_mask *rn_mkfreelist;
104 static struct radix_node_head *mask_rnhead;
106 * Work area -- the following point to 2 buffers of size max_keylen,
107 * allocated in this order in a block of memory malloc'ed by rn_init.
108 * A third buffer of size MAX_KEYLEN is allocated from the stack.
110 static char *rn_zeros, *rn_ones;
112 #define MKGet(m) R_Malloc(m, radix_mask_cache, sizeof (struct radix_mask))
113 #define MKFree(m) Free(m, radix_mask_cache)
114 #define rn_masktop (mask_rnhead->rnh_treetop)
116 static boolean_t rn_lexobetter(void *m_arg, void *n_arg);
117 static struct radix_mask *
118 rn_new_radix_mask(struct radix_node *tt,
119 struct radix_mask *next);
120 static boolean_t
121 rn_satisfies_leaf(char *trial, struct radix_node *leaf,
122 int skip, match_leaf_t *rn_leaf_fn, void *rn_leaf_arg);
124 #define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg))
127 * The data structure for the keys is a radix tree with one way
128 * branching removed. The index rn_bit at an internal node n represents a bit
129 * position to be tested. The tree is arranged so that all descendants
130 * of a node n have keys whose bits all agree up to position rn_bit - 1.
131 * (We say the index of n is rn_bit.)
133 * There is at least one descendant which has a one bit at position rn_bit,
134 * and at least one with a zero there.
136 * A route is determined by a pair of key and mask. We require that the
137 * bit-wise logical and of the key and mask to be the key.
138 * We define the index of a route associated with the mask to be
139 * the first bit number in the mask where 0 occurs (with bit number 0
140 * representing the highest order bit).
142 * We say a mask is normal if every bit is 0, past the index of the mask.
143 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
144 * and m is a normal mask, then the route applies to every descendant of n.
145 * If the index(m) < rn_bit, this implies the trailing last few bits of k
146 * before bit b are all 0, (and hence consequently true of every descendant
147 * of n), so the route applies to all descendants of the node as well.
149 * Similar logic shows that a non-normal mask m such that
150 * index(m) <= index(n) could potentially apply to many children of n.
151 * Thus, for each non-host route, we attach its mask to a list at an internal
152 * node as high in the tree as we can go.
154 * The present version of the code makes use of normal routes in short-
155 * circuiting an explict mask and compare operation when testing whether
156 * a key satisfies a normal route, and also in remembering the unique leaf
157 * that governs a subtree.
161 * Most of the functions in this code assume that the key/mask arguments
162 * are sockaddr-like structures, where the first byte is an uchar_t
163 * indicating the size of the entire structure.
165 * To make the assumption more explicit, we use the LEN() macro to access
166 * this field. It is safe to pass an expression with side effects
167 * to LEN() as the argument is evaluated only once.
169 #define LEN(x) (*(const uchar_t *)(x))
173 * Search a node in the tree matching the key.
175 static struct radix_node *
176 rn_search(v_arg, head)
177 void *v_arg;
178 struct radix_node *head;
180 struct radix_node *x;
181 caddr_t v;
183 for (x = head, v = v_arg; x->rn_bit >= 0; ) {
184 if (x->rn_bmask & v[x->rn_offset])
185 x = x->rn_right;
186 else
187 x = x->rn_left;
189 return (x);
193 * Same as above, but with an additional mask.
195 static struct radix_node *
196 rn_search_m(v_arg, head, m_arg)
197 struct radix_node *head;
198 void *v_arg, *m_arg;
200 struct radix_node *x;
201 caddr_t v = v_arg, m = m_arg;
203 for (x = head; x->rn_bit >= 0; ) {
204 if ((x->rn_bmask & m[x->rn_offset]) &&
205 (x->rn_bmask & v[x->rn_offset]))
206 x = x->rn_right;
207 else
208 x = x->rn_left;
210 return (x);
214 * Returns true if there are no bits set in n_arg that are zero in
215 * m_arg and the masks aren't equal. In other words, it returns true
216 * when m_arg is a finer-granularity netmask -- it represents a subset
217 * of the destinations implied by n_arg.
219 static boolean_t
220 rn_refines(m_arg, n_arg)
221 void *m_arg, *n_arg;
223 caddr_t m = m_arg, n = n_arg;
224 caddr_t lim = n + LEN(n), lim2 = lim;
225 int longer = LEN(n++) - (int)LEN(m++);
226 boolean_t masks_are_equal = B_TRUE;
228 if (longer > 0)
229 lim -= longer;
230 while (n < lim) {
231 if (*n & ~(*m))
232 return (0);
233 if (*n++ != *m++)
234 masks_are_equal = B_FALSE;
236 while (n < lim2)
237 if (*n++)
238 return (B_FALSE);
239 if (masks_are_equal && (longer < 0))
240 for (lim2 = m - longer; m < lim2; )
241 if (*m++)
242 return (B_TRUE);
243 return (!masks_are_equal);
246 static struct radix_node *
247 rn_lookup(v_arg, m_arg, head)
248 void *v_arg, *m_arg;
249 struct radix_node_head *head;
251 struct radix_node *x;
252 caddr_t netmask = NULL;
254 if (m_arg) {
255 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
256 if (x == NULL)
257 return (NULL);
258 netmask = x->rn_key;
260 x = rn_match(v_arg, head);
261 if (x && netmask) {
262 while (x && x->rn_mask != netmask)
263 x = x->rn_dupedkey;
265 return (x);
269 * Returns true if address 'trial' has no bits differing from the
270 * leaf's key when compared under the leaf's mask. In other words,
271 * returns true when 'trial' matches leaf.
272 * In addition, if a rn_leaf_fn is passed in, that is used to find
273 * a match on conditions defined by the caller of rn_match. This is
274 * used by the kernel ftable to match on IRE_MATCH_* conditions.
276 static boolean_t
277 rn_satisfies_leaf(trial, leaf, skip, rn_leaf_fn, rn_leaf_arg)
278 caddr_t trial;
279 struct radix_node *leaf;
280 int skip;
281 match_leaf_t *rn_leaf_fn;
282 void *rn_leaf_arg;
284 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
285 char *cplim;
286 int length = min(LEN(cp), LEN(cp2));
288 if (cp3 == 0)
289 cp3 = rn_ones;
290 else
291 length = min(length, LEN(cp3));
292 cplim = cp + length;
293 cp3 += skip;
294 cp2 += skip;
296 for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
297 if ((*cp ^ *cp2) & *cp3)
298 return (B_FALSE);
300 return (RN_MATCHF(leaf, rn_leaf_fn, rn_leaf_arg));
303 static struct radix_node *
304 rn_match(v_arg, head)
305 void *v_arg;
306 struct radix_node_head *head;
308 return (rn_match_args(v_arg, head, NULL, NULL));
311 static struct radix_node *
312 rn_match_args(v_arg, head, rn_leaf_fn, rn_leaf_arg)
313 void *v_arg;
314 struct radix_node_head *head;
315 match_leaf_t *rn_leaf_fn;
316 void *rn_leaf_arg;
318 caddr_t v = v_arg;
319 struct radix_node *t = head->rnh_treetop, *x;
320 caddr_t cp = v, cp2;
321 caddr_t cplim;
322 struct radix_node *saved_t, *top = t;
323 int off = t->rn_offset, vlen = LEN(cp), matched_off;
324 int test, b, rn_bit;
327 * Open code rn_search(v, top) to avoid overhead of extra
328 * subroutine call.
330 for (; t->rn_bit >= 0; ) {
331 if (t->rn_bmask & cp[t->rn_offset])
332 t = t->rn_right;
333 else
334 t = t->rn_left;
337 * See if we match exactly as a host destination
338 * or at least learn how many bits match, for normal mask finesse.
340 * It doesn't hurt us to limit how many bytes to check
341 * to the length of the mask, since if it matches we had a genuine
342 * match and the leaf we have is the most specific one anyway;
343 * if it didn't match with a shorter length it would fail
344 * with a long one. This wins big for class B&C netmasks which
345 * are probably the most common case...
347 if (t->rn_mask)
348 vlen = LEN(t->rn_mask);
349 cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
350 for (; cp < cplim; cp++, cp2++)
351 if (*cp != *cp2)
352 goto keydiff;
354 * This extra grot is in case we are explicitly asked
355 * to look up the default. Ugh!
357 * Never return the root node itself, it seems to cause a
358 * lot of confusion.
360 if (t->rn_flags & RNF_ROOT)
361 t = t->rn_dupedkey;
362 if (t == NULL || RN_MATCHF(t, rn_leaf_fn, rn_leaf_arg)) {
363 return (t);
364 } else {
366 * Although we found an exact match on the key, rn_leaf_fn
367 * is looking for some other criteria as well. Continue
368 * looking as if the exact match failed.
370 if (t->rn_dupedkey == NULL &&
371 (t->rn_parent->rn_flags & RNF_ROOT)) {
372 /* no more dupedkeys and hit the top. have to give up */
373 return (NULL);
375 b = 0;
376 goto keeplooking;
379 keydiff:
380 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
381 for (b = 7; (test >>= 1) > 0; )
382 b--;
383 keeplooking:
384 matched_off = cp - v;
385 b += matched_off << 3;
386 rn_bit = -1 - b;
389 * If there is a host route in a duped-key chain, it will be first.
391 if ((saved_t = t)->rn_mask == 0)
392 t = t->rn_dupedkey;
393 for (; t != NULL; t = t->rn_dupedkey) {
395 * Even if we don't match exactly as a host,
396 * we may match if the leaf we wound up at is
397 * a route to a net.
400 if (t->rn_flags & RNF_NORMAL) {
401 if ((rn_bit <= t->rn_bit) &&
402 RN_MATCHF(t, rn_leaf_fn, rn_leaf_arg)) {
403 return (t);
405 } else if (rn_satisfies_leaf(v, t, matched_off, rn_leaf_fn,
406 rn_leaf_arg)) {
407 return (t);
410 t = saved_t;
411 /* start searching up the tree */
412 do {
413 struct radix_mask *m;
415 t = t->rn_parent;
416 m = t->rn_mklist;
418 * If non-contiguous masks ever become important
419 * we can restore the masking and open coding of
420 * the search and satisfaction test and put the
421 * calculation of "off" back before the "do".
423 while (m) {
424 if (m->rm_flags & RNF_NORMAL) {
425 if ((rn_bit <= m->rm_bit) &&
426 RN_MATCHF(m->rm_leaf, rn_leaf_fn,
427 rn_leaf_arg)) {
428 return (m->rm_leaf);
430 } else {
431 off = min(t->rn_offset, matched_off);
432 x = rn_search_m(v, t, m->rm_mask);
433 while (x != NULL && x->rn_mask != m->rm_mask)
434 x = x->rn_dupedkey;
435 if (x && rn_satisfies_leaf(v, x, off,
436 rn_leaf_fn, rn_leaf_arg)) {
437 return (x);
440 m = m->rm_mklist;
442 } while (t != top);
443 return (0);
447 * Whenever we add a new leaf to the tree, we also add a parent node,
448 * so we allocate them as an array of two elements: the first one must be
449 * the leaf (see RNTORT() in route.c), the second one is the parent.
450 * This routine initializes the relevant fields of the nodes, so that
451 * the leaf is the left child of the parent node, and both nodes have
452 * (almost) all all fields filled as appropriate.
453 * The function returns a pointer to the parent node.
456 static struct radix_node *
457 rn_newpair(v, b, nodes)
458 void *v;
459 int b;
460 struct radix_node nodes[2];
462 struct radix_node *tt = nodes, *t = tt + 1;
464 t->rn_bit = b;
465 t->rn_bmask = 0x80 >> (b & 7);
466 t->rn_left = tt;
467 t->rn_offset = b >> 3;
470 * t->rn_parent, r->rn_right, tt->rn_mask, tt->rn_dupedkey
471 * and tt->rn_bmask must have been zeroed by caller.
473 tt->rn_bit = -1;
474 tt->rn_key = v;
475 tt->rn_parent = t;
476 tt->rn_flags = t->rn_flags = RNF_ACTIVE;
477 tt->rn_mklist = t->rn_mklist = 0;
478 return (t);
481 static struct radix_node *
482 rn_insert(v_arg, head, dupentry, nodes)
483 void *v_arg;
484 struct radix_node_head *head;
485 int *dupentry;
486 struct radix_node nodes[2];
488 caddr_t v = v_arg;
489 struct radix_node *top = head->rnh_treetop;
490 struct radix_node *p, *x;
491 int head_off = top->rn_offset, vlen = (int)LEN(v);
492 struct radix_node *t = rn_search(v_arg, top);
493 caddr_t cp = v + head_off;
494 int b;
495 struct radix_node *tt;
496 caddr_t cp2 = t->rn_key + head_off;
497 int cmp_res;
498 caddr_t cplim = v + vlen;
501 * Find first bit at which v and t->rn_key differ
503 while (cp < cplim)
504 if (*cp2++ != *cp++)
505 goto on1;
506 *dupentry = 1;
507 return (t);
508 on1:
509 *dupentry = 0;
510 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
512 * (cp - v) is the number of bytes where the match is relevant.
513 * Multiply by 8 to get number of bits. Then reduce this number
514 * by the trailing bits in the last byte where we have a match
515 * by looking at (cmp_res >> 1) in each iteration below.
516 * Note that v starts at the beginning of the key, so, when key
517 * is a sockaddr structure, the preliminary len/family/port bytes
518 * are accounted for.
520 for (b = (cp - v) << 3; cmp_res; b--)
521 cmp_res >>= 1;
522 cp = v;
523 x = top;
524 do {
525 p = x;
526 if (cp[x->rn_offset] & x->rn_bmask)
527 x = x->rn_right;
528 else
529 x = x->rn_left;
530 } while (b > (unsigned)x->rn_bit);
531 /* x->rn_bit < b && x->rn_bit >= 0 */
533 * now the rightmost bit where v and rn_key differ (b) is <
534 * x->rn_bit.
536 * We will add a new branch at p. b cannot equal x->rn_bit
537 * because we know we didn't find a duplicated key.
538 * The tree will be re-adjusted so that t is inserted between p
539 * and x.
541 t = rn_newpair(v_arg, b, nodes);
542 tt = t->rn_left;
543 if ((cp[p->rn_offset] & p->rn_bmask) == 0)
544 p->rn_left = t;
545 else
546 p->rn_right = t;
547 x->rn_parent = t;
548 t->rn_parent = p;
549 if ((cp[t->rn_offset] & t->rn_bmask) == 0) {
550 t->rn_right = x;
551 } else {
552 t->rn_right = tt;
553 t->rn_left = x;
555 return (tt);
558 static struct radix_node *
559 rn_addmask(n_arg, search, skip)
560 int search, skip;
561 void *n_arg;
563 caddr_t netmask = (caddr_t)n_arg;
564 struct radix_node *x;
565 caddr_t cp, cplim;
566 int b = 0, mlen, j;
567 int maskduplicated, m0, isnormal;
568 struct radix_node *saved_x;
569 int last_zeroed = 0;
570 char addmask_key[MAX_KEYLEN];
572 if ((mlen = LEN(netmask)) > max_keylen)
573 mlen = max_keylen;
574 if (skip == 0)
575 skip = 1;
576 if (mlen <= skip)
577 return (mask_rnhead->rnh_nodes);
578 if (skip > 1)
579 bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
580 if ((m0 = mlen) > skip)
581 bcopy(netmask + skip, addmask_key + skip, mlen - skip);
583 * Trim trailing zeroes.
585 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0; )
586 cp--;
587 mlen = cp - addmask_key;
588 if (mlen <= skip) {
589 if (m0 >= last_zeroed)
590 last_zeroed = mlen;
591 return (mask_rnhead->rnh_nodes);
593 if (m0 < last_zeroed)
594 bzero(addmask_key + m0, last_zeroed - m0);
595 *addmask_key = last_zeroed = mlen;
596 x = rn_search(addmask_key, rn_masktop);
597 if (bcmp(addmask_key, x->rn_key, mlen) != 0)
598 x = 0;
599 if (x || search)
600 return (x);
601 R_Zalloc(x, radix_node_cache, max_keylen + 2 * sizeof (*x));
603 if ((saved_x = x) == 0)
604 return (0);
605 netmask = cp = (caddr_t)(x + 2);
606 bcopy(addmask_key, cp, mlen);
607 x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
608 if (maskduplicated) {
609 #ifdef _KERNEL
610 cmn_err(CE_WARN, "rn_addmask: mask impossibly already in tree");
611 #else
612 syslog(LOG_ERR, "rn_addmask: mask impossibly already in tree");
613 #endif /* _KERNEL */
614 Free(saved_x, radix_node_cache);
615 return (x);
618 * Calculate index of mask, and check for normalcy.
619 * First find the first byte with a 0 bit, then if there are
620 * more bits left (remember we already trimmed the trailing 0's),
621 * the pattern must be one of those in normal_chars[], or we have
622 * a non-contiguous mask.
624 cplim = netmask + mlen;
625 isnormal = 1;
626 for (cp = netmask + skip; (cp < cplim) && *(uchar_t *)cp == 0xff; )
627 cp++;
628 if (cp != cplim) {
629 static uint8_t normal_chars[] = {
630 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
632 for (j = 0x80; (j & *cp) != 0; j >>= 1)
633 b++;
634 if (*cp != normal_chars[b] || cp != (cplim - 1))
635 isnormal = 0;
637 b += (cp - netmask) << 3;
638 x->rn_bit = -1 - b;
639 if (isnormal)
640 x->rn_flags |= RNF_NORMAL;
641 return (x);
644 /* arbitrary ordering for non-contiguous masks */
645 static boolean_t
646 rn_lexobetter(m_arg, n_arg)
647 void *m_arg, *n_arg;
649 uchar_t *mp = m_arg, *np = n_arg, *lim;
651 if (LEN(mp) > LEN(np))
652 /* not really, but need to check longer one first */
653 return (B_TRUE);
654 if (LEN(mp) == LEN(np))
655 for (lim = mp + LEN(mp); mp < lim; )
656 if (*mp++ > *np++)
657 return (B_TRUE);
658 return (B_FALSE);
661 static struct radix_mask *
662 rn_new_radix_mask(tt, next)
663 struct radix_node *tt;
664 struct radix_mask *next;
666 struct radix_mask *m;
668 MKGet(m);
669 if (m == 0) {
670 #ifndef _KERNEL
671 syslog(LOG_ERR, "Mask for route not entered\n");
672 #endif /* _KERNEL */
673 return (0);
675 bzero(m, sizeof (*m));
676 m->rm_bit = tt->rn_bit;
677 m->rm_flags = tt->rn_flags;
678 if (tt->rn_flags & RNF_NORMAL)
679 m->rm_leaf = tt;
680 else
681 m->rm_mask = tt->rn_mask;
682 m->rm_mklist = next;
683 tt->rn_mklist = m;
684 return (m);
687 static struct radix_node *
688 rn_addroute(v_arg, n_arg, head, treenodes)
689 void *v_arg, *n_arg;
690 struct radix_node_head *head;
691 struct radix_node treenodes[2];
693 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
694 struct radix_node *t, *x = 0, *tt;
695 struct radix_node *saved_tt, *top = head->rnh_treetop;
696 short b = 0, b_leaf = 0;
697 int keyduplicated;
698 caddr_t mmask;
699 struct radix_mask *m, **mp;
702 * In dealing with non-contiguous masks, there may be
703 * many different routes which have the same mask.
704 * We will find it useful to have a unique pointer to
705 * the mask to speed avoiding duplicate references at
706 * nodes and possibly save time in calculating indices.
708 if (netmask) {
709 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0)
710 return (0);
711 b_leaf = x->rn_bit;
712 b = -1 - x->rn_bit;
713 netmask = x->rn_key;
716 * Deal with duplicated keys: attach node to previous instance
718 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
719 if (keyduplicated) {
720 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
721 if (tt->rn_mask == netmask)
722 return (0);
723 if (netmask == 0 ||
724 (tt->rn_mask &&
725 /* index (netmask) > node */
726 ((b_leaf < tt->rn_bit) ||
727 rn_refines(netmask, tt->rn_mask) ||
728 rn_lexobetter(netmask, tt->rn_mask))))
729 break;
732 * If the mask is not duplicated, we wouldn't
733 * find it among possible duplicate key entries
734 * anyway, so the above test doesn't hurt.
736 * Insert treenodes before tt.
738 * We sort the masks for a duplicated key the same way as
739 * in a masklist -- most specific to least specific.
740 * This may require the unfortunate nuisance of relocating
741 * the head of the list.
743 * We also reverse, or doubly link the list through the
744 * parent pointer.
746 if (tt == saved_tt) {
747 struct radix_node *xx = x;
748 /* link in at head of list */
749 (tt = treenodes)->rn_dupedkey = t;
750 tt->rn_flags = t->rn_flags;
751 tt->rn_parent = x = t->rn_parent;
752 t->rn_parent = tt; /* parent */
753 if (x->rn_left == t)
754 x->rn_left = tt;
755 else
756 x->rn_right = tt;
757 saved_tt = tt; x = xx;
758 } else {
759 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
760 t->rn_dupedkey = tt;
761 /* Set rn_parent value for tt and tt->rn_dupedkey */
762 tt->rn_parent = t;
763 if (tt->rn_dupedkey)
764 tt->rn_dupedkey->rn_parent = tt;
766 tt->rn_key = v;
767 tt->rn_bit = -1;
768 tt->rn_flags = RNF_ACTIVE;
771 * Put mask in tree.
773 if (netmask) {
774 tt->rn_mask = netmask;
775 tt->rn_bit = x->rn_bit;
776 tt->rn_flags |= x->rn_flags & RNF_NORMAL;
778 /* BEGIN CSTYLED */
780 * at this point the parent-child relationship for p, t, x, tt is
781 * one of the following:
782 * p p
783 * : (left/right child) :
784 * : :
785 * t t
786 * / \ / \
787 * x tt tt x
789 * tt == saved_tt returned by rn_insert().
791 /* END CSTYLED */
792 t = saved_tt->rn_parent;
793 if (keyduplicated)
794 goto key_exists;
795 b_leaf = -1 - t->rn_bit;
797 * b_leaf is now normalized to be in the leaf rn_bit format
798 * (it is the rn_bit value of a leaf corresponding to netmask
799 * of t->rn_bit).
801 if (t->rn_right == saved_tt)
802 x = t->rn_left;
803 else
804 x = t->rn_right;
806 * Promote general routes from below.
807 * Identify the less specific netmasks and add them to t->rm_mklist
809 if (x->rn_bit < 0) {
810 /* x is the sibling node. it is a leaf node. */
811 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
812 if (x->rn_mask && (x->rn_bit >= b_leaf) &&
813 x->rn_mklist == 0) {
815 * x is the first node in the dupedkey chain
816 * without a mklist, and with a shorter mask
817 * than b_leaf. Create a radix_mask
818 * corresponding to x's mask and add it to
819 * t's rn_mklist. The mask list gets created
820 * in decreasing order of mask length.
822 *mp = m = rn_new_radix_mask(x, 0);
823 if (m)
824 mp = &m->rm_mklist;
826 } else if (x->rn_mklist) {
828 * Skip over masks whose index is > that of new node
830 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
831 if (m->rm_bit >= b_leaf)
832 break;
833 t->rn_mklist = m; *mp = 0;
835 key_exists:
836 /* Add new route to highest possible ancestor's list */
837 if ((netmask == 0) || (b > t->rn_bit))
838 return (tt); /* can't lift at all */
839 b_leaf = tt->rn_bit;
840 /* b is the index of the netmask */
841 do {
842 x = t;
843 t = t->rn_parent;
844 } while (b <= t->rn_bit && x != top);
846 * Search through routes associated with node to
847 * insert new route according to index.
848 * Need same criteria as when sorting dupedkeys to avoid
849 * double loop on deletion.
851 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
852 if (m->rm_bit < b_leaf)
853 continue;
854 if (m->rm_bit > b_leaf)
855 break;
856 if (m->rm_flags & RNF_NORMAL) {
857 mmask = m->rm_leaf->rn_mask;
858 if (tt->rn_flags & RNF_NORMAL) {
859 #ifdef _KERNEL
860 cmn_err(CE_WARN, "Non-unique normal route, "
861 "mask not entered\n");
862 #else
863 syslog(LOG_ERR, "Non-unique normal route, "
864 "mask not entered\n");
865 #endif /* _KERNEL */
866 return (tt);
868 } else
869 mmask = m->rm_mask;
870 if (mmask == netmask) {
871 m->rm_refs++;
872 tt->rn_mklist = m;
873 return (tt);
875 if (rn_refines(netmask, mmask) ||
876 rn_lexobetter(netmask, mmask))
877 break;
879 *mp = rn_new_radix_mask(tt, *mp);
880 return (tt);
883 static struct radix_node *
884 rn_delete(v_arg, netmask_arg, head)
885 void *v_arg, *netmask_arg;
886 struct radix_node_head *head;
888 struct radix_node *t, *p, *x, *tt;
889 struct radix_mask *m, *saved_m, **mp;
890 struct radix_node *dupedkey, *saved_tt, *top;
891 caddr_t v, netmask;
892 int b, head_off, vlen;
894 v = v_arg;
895 netmask = netmask_arg;
896 x = head->rnh_treetop;
897 tt = rn_search(v, x);
898 head_off = x->rn_offset;
899 vlen = LEN(v);
900 saved_tt = tt;
901 top = x;
902 if (tt == 0 ||
903 bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
904 return (0);
906 * Delete our route from mask lists.
908 if (netmask) {
909 if ((x = rn_addmask(netmask, 1, head_off)) == 0)
910 return (0);
911 netmask = x->rn_key;
912 while (tt->rn_mask != netmask)
913 if ((tt = tt->rn_dupedkey) == 0)
914 return (0);
916 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
917 goto on1;
918 if (tt->rn_flags & RNF_NORMAL) {
919 if (m->rm_leaf != tt || m->rm_refs > 0) {
920 #ifdef _KERNEL
921 cmn_err(CE_WARN,
922 "rn_delete: inconsistent annotation\n");
923 #else
924 syslog(LOG_ERR, "rn_delete: inconsistent annotation\n");
925 #endif /* _KERNEL */
926 return (0); /* dangling ref could cause disaster */
928 } else {
929 if (m->rm_mask != tt->rn_mask) {
930 #ifdef _KERNEL
931 cmn_err(CE_WARN,
932 "rn_delete: inconsistent annotation 2\n");
933 #else
934 syslog(LOG_ERR,
935 "rn_delete: inconsistent annotation 2\n");
936 #endif /* _KERNEL */
937 goto on1;
939 if (--m->rm_refs >= 0)
940 goto on1;
942 b = -1 - tt->rn_bit;
943 t = saved_tt->rn_parent;
944 if (b > t->rn_bit)
945 goto on1; /* Wasn't lifted at all */
946 do {
947 x = t;
948 t = t->rn_parent;
949 } while (b <= t->rn_bit && x != top);
950 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
951 if (m == saved_m) {
952 *mp = m->rm_mklist;
953 MKFree(m);
954 break;
956 if (m == 0) {
957 #ifdef _KERNEL
958 cmn_err(CE_WARN, "rn_delete: couldn't find our annotation\n");
959 #else
960 syslog(LOG_ERR, "rn_delete: couldn't find our annotation\n");
961 #endif /* _KERNEL */
962 if (tt->rn_flags & RNF_NORMAL)
963 return (0); /* Dangling ref to us */
965 on1:
967 * Eliminate us from tree
969 if (tt->rn_flags & RNF_ROOT)
970 return (0);
971 t = tt->rn_parent;
972 dupedkey = saved_tt->rn_dupedkey;
973 if (dupedkey) {
975 * Here, tt is the deletion target and
976 * saved_tt is the head of the dupekey chain.
978 if (tt == saved_tt) {
979 /* remove from head of chain */
980 x = dupedkey; x->rn_parent = t;
981 if (t->rn_left == tt)
982 t->rn_left = x;
983 else
984 t->rn_right = x;
985 } else {
986 /* find node in front of tt on the chain */
987 for (x = p = saved_tt; p && p->rn_dupedkey != tt; )
988 p = p->rn_dupedkey;
989 if (p) {
990 p->rn_dupedkey = tt->rn_dupedkey;
991 if (tt->rn_dupedkey) /* parent */
992 tt->rn_dupedkey->rn_parent = p;
993 /* parent */
994 } else
995 #ifdef _KERNEL
996 cmn_err(CE_WARN,
997 "rn_delete: couldn't find us\n");
998 #else
999 syslog(LOG_ERR,
1000 "rn_delete: couldn't find us\n");
1001 #endif /* _KERNEL */
1003 t = tt + 1;
1004 if (t->rn_flags & RNF_ACTIVE) {
1005 *++x = *t;
1006 p = t->rn_parent;
1007 if (p->rn_left == t)
1008 p->rn_left = x;
1009 else
1010 p->rn_right = x;
1011 x->rn_left->rn_parent = x;
1012 x->rn_right->rn_parent = x;
1014 goto out;
1016 if (t->rn_left == tt)
1017 x = t->rn_right;
1018 else
1019 x = t->rn_left;
1020 p = t->rn_parent;
1021 if (p->rn_right == t)
1022 p->rn_right = x;
1023 else
1024 p->rn_left = x;
1025 x->rn_parent = p;
1027 * Demote routes attached to us.
1029 if (t->rn_mklist) {
1030 if (x->rn_bit >= 0) {
1031 for (mp = &x->rn_mklist; (m = *mp) != NULL; )
1032 mp = &m->rm_mklist;
1033 *mp = t->rn_mklist;
1034 } else {
1036 * If there are any key,mask pairs in a sibling
1037 * duped-key chain, some subset will appear sorted
1038 * in the same order attached to our mklist
1040 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey)
1041 if (m == x->rn_mklist) {
1042 struct radix_mask *mm = m->rm_mklist;
1043 x->rn_mklist = 0;
1044 if (--(m->rm_refs) < 0)
1045 MKFree(m);
1046 m = mm;
1048 if (m)
1049 #ifdef _KERNEL
1050 cmn_err(CE_WARN,
1051 "rn_delete: Orphaned Mask %p at %p\n",
1052 (void *)m, (void *)x);
1053 #else
1054 syslog(LOG_ERR,
1055 "rn_delete: Orphaned Mask %p at %p\n",
1056 (void *)m, (void *)x);
1057 #endif /* _KERNEL */
1061 * We may be holding an active internal node in the tree.
1063 x = tt + 1;
1064 if (t != x) {
1065 *t = *x;
1066 t->rn_left->rn_parent = t;
1067 t->rn_right->rn_parent = t;
1068 p = x->rn_parent;
1069 if (p->rn_left == x)
1070 p->rn_left = t;
1071 else
1072 p->rn_right = t;
1074 out:
1075 tt->rn_flags &= ~RNF_ACTIVE;
1076 tt[1].rn_flags &= ~RNF_ACTIVE;
1077 return (tt);
1081 * Walk the radix tree; For the kernel routing table, we hold additional
1082 * refs on the ire_bucket to ensure that the walk function f() does not
1083 * run into trashed memory. The kernel routing table is identified by
1084 * a rnh_treetop that has RNF_SUNW_FT set in the rn_flags.
1085 * Note that all refs takein in rn_walktree are released before it returns,
1086 * so that f() will need to take any additional references on memory
1087 * to be passed back to the caller of rn_walktree.
1089 static int
1090 rn_walktree(h, f, w)
1091 struct radix_node_head *h;
1092 walktree_f_t *f;
1093 void *w;
1095 return (rn_walktree_mt(h, f, w, NULL, NULL));
1097 static int
1098 rn_walktree_mt(h, f, w, lockf, unlockf)
1099 struct radix_node_head *h;
1100 walktree_f_t *f;
1101 void *w;
1102 lockf_t lockf, unlockf;
1104 int error;
1105 struct radix_node *base, *next;
1106 struct radix_node *rn = h->rnh_treetop;
1107 boolean_t is_mt = B_FALSE;
1109 if (lockf != NULL) {
1110 ASSERT(unlockf != NULL);
1111 is_mt = B_TRUE;
1114 * This gets complicated because we may delete the node
1115 * while applying the function f to it, so we need to calculate
1116 * the successor node in advance.
1118 RADIX_NODE_HEAD_RLOCK(h);
1119 /* First time through node, go left */
1120 while (rn->rn_bit >= 0) {
1121 rn = rn->rn_left;
1124 if (is_mt)
1125 (*lockf)(rn);
1127 for (;;) {
1128 base = rn;
1129 /* If at right child go back up, otherwise, go right */
1130 while (rn->rn_parent->rn_right == rn &&
1131 (rn->rn_flags & RNF_ROOT) == 0) {
1132 rn = rn->rn_parent;
1134 /* Find the next *leaf* since next node might vanish, too */
1135 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0; ) {
1136 rn = rn->rn_left;
1138 next = rn;
1140 if (is_mt && next != NULL)
1141 (*lockf)(next);
1143 /* Process leaves */
1144 while ((rn = base) != NULL) {
1145 base = rn->rn_dupedkey;
1147 if (is_mt && base != NULL)
1148 (*lockf)(base);
1150 RADIX_NODE_HEAD_UNLOCK(h);
1151 if (!(rn->rn_flags & RNF_ROOT) &&
1152 (error = (*f)(rn, w))) {
1153 if (is_mt) {
1154 (*unlockf)(rn);
1155 if (base != NULL)
1156 (*unlockf)(base);
1157 if (next != NULL)
1158 (*unlockf)(next);
1160 return (error);
1162 if (is_mt)
1163 (*unlockf)(rn);
1164 RADIX_NODE_HEAD_RLOCK(h);
1166 rn = next;
1167 if (rn->rn_flags & RNF_ROOT) {
1168 RADIX_NODE_HEAD_UNLOCK(h);
1170 * no ref to release, since we never take a ref
1171 * on the root node- it can't be deleted.
1173 return (0);
1176 /* NOTREACHED */
1180 * Allocate and initialize an empty tree. This has 3 nodes, which are
1181 * part of the radix_node_head (in the order <left,root,right>) and are
1182 * marked RNF_ROOT so they cannot be freed.
1183 * The leaves have all-zero and all-one keys, with significant
1184 * bits starting at 'off'.
1185 * Return 1 on success, 0 on error.
1188 rn_inithead(head, off)
1189 void **head;
1190 int off;
1192 struct radix_node_head *rnh;
1193 struct radix_node *t, *tt, *ttt;
1194 if (*head)
1195 return (1);
1196 R_ZallocSleep(rnh, struct radix_node_head *, sizeof (*rnh));
1197 if (rnh == 0)
1198 return (0);
1199 #ifdef _KERNEL
1200 RADIX_NODE_HEAD_LOCK_INIT(rnh);
1201 #endif
1202 *head = rnh;
1203 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
1204 ttt = rnh->rnh_nodes + 2;
1205 t->rn_right = ttt;
1206 t->rn_parent = t;
1207 tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */
1208 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
1209 tt->rn_bit = -1 - off;
1210 *ttt = *tt;
1211 ttt->rn_key = rn_ones;
1212 rnh->rnh_addaddr = rn_addroute;
1213 rnh->rnh_deladdr = rn_delete;
1214 rnh->rnh_matchaddr = rn_match;
1215 rnh->rnh_matchaddr_args = rn_match_args;
1216 rnh->rnh_lookup = rn_lookup;
1217 rnh->rnh_walktree = rn_walktree;
1218 rnh->rnh_walktree_mt = rn_walktree_mt;
1219 rnh->rnh_walktree_from = NULL; /* not implemented */
1220 rnh->rnh_treetop = t;
1221 return (1);
1224 void
1225 rn_init()
1227 char *cp, *cplim;
1229 #ifdef _KERNEL
1230 radix_mask_cache = kmem_cache_create("radix_mask",
1231 sizeof (struct radix_mask), 0, NULL, NULL, NULL, NULL, NULL, 0);
1232 radix_node_cache = kmem_cache_create("radix_node",
1233 max_keylen + 2 * sizeof (struct radix_node),
1234 0, NULL, NULL, NULL, NULL, NULL, 0);
1235 #endif /* _KERNEL */
1236 R_ZallocSleep(rn_zeros, char *, 2 * max_keylen);
1238 ASSERT(rn_zeros != NULL);
1239 bzero(rn_zeros, 2 * max_keylen);
1240 rn_ones = cp = rn_zeros + max_keylen;
1241 cplim = rn_ones + max_keylen;
1242 while (cp < cplim)
1243 *cp++ = -1;
1244 if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0)
1245 panic("rn_init: could not init mask_rnhead ");
1249 rn_freenode(n, p)
1250 struct radix_node *n;
1251 void *p;
1253 struct radix_node_head *rnh = p;
1254 struct radix_node *d;
1256 d = rnh->rnh_deladdr(n->rn_key, NULL, rnh);
1257 if (d != NULL) {
1258 Free(d, radix_node_cache);
1260 return (0);
1264 void
1265 rn_freehead(rnh)
1266 struct radix_node_head *rnh;
1268 (void) rn_walktree(rnh, rn_freenode, rnh);
1270 rnh->rnh_addaddr = NULL;
1271 rnh->rnh_deladdr = NULL;
1272 rnh->rnh_matchaddr = NULL;
1273 rnh->rnh_lookup = NULL;
1274 rnh->rnh_walktree = NULL;
1276 #ifdef _KERNEL
1277 RADIX_NODE_HEAD_DESTROY(rnh);
1278 FreeHead(rnh, sizeof (*rnh));
1279 #else
1280 Free(rnh, NULL);
1281 #endif /* _KERNEL */
1284 void
1285 rn_fini()
1287 struct radix_mask *m;
1289 if (rn_zeros != NULL) {
1290 #ifdef _KERNEL
1291 FreeHead(rn_zeros, 2 * max_keylen);
1292 #else
1293 Free(rn_zeros, NULL);
1294 #endif
1295 rn_zeros = NULL;
1299 if (mask_rnhead != NULL) {
1300 rn_freehead(mask_rnhead);
1301 mask_rnhead = NULL;
1304 while ((m = rn_mkfreelist) != NULL) {
1305 rn_mkfreelist = m->rm_mklist;
1306 Free(m, NULL);