Sync with cat.c from netbsd-8
[minix3.git] / minix / net / lwip / rttree.c
blob73e22495ab932933f05dc2391bbcd562da295899
1 /* LWIP service - rttree.c - generic routing tree data structure */
2 /*
3 * This module implements the Net/3 binary radix (Patricia) tree as described
4 * in TCP/IP Illustrated Vol.2, with a few important changes. First and
5 * foremost, we make the assumption that all address masks are "normal", i.e.,
6 * they can be expressed in terms of a "prefix length" or "bit count", meaning
7 * that the first so many bits of the mask are set and the remaining bits are
8 * all clear. Based on this assumption, we store routing entries not just in
9 * leaf nodes, but rather in a node at the bit count of the routing entry's
10 * mask; this node may then also have children. As a result, instead of "leaf"
11 * and "internal" nodes, this module instead uses "data" and "link" nodes:
13 * - Data nodes are nodes with an associated routing entry. The data node
14 * structure is always the first field of its corresponding routing entry
15 * structure. Data nodes may have zero, one, or two children. Its children
16 * are always a refinement of the address mask in the routing entry.
17 * - Link nodes are nodes with no associated routing entry. They always have
18 * exactly two children. As with BSD's "internal" nodes: since the tree
19 * needs no more than one link node per routing entry, each routing entry
20 * structure contains a link node, which may be used anywhere in the tree.
22 * The result of this approach is that we do not use a linked list for each
23 * leaf, since entries with the same address and different masks are not stored
24 * as part of the same leaf node. There is however still one case where a
25 * linked list would be necessary: the coexistence of a full-mask network entry
26 * and a host entry (net/32 vs host for IPv4, net/128 vs host for IPv6). Since
27 * this tree implementation is not used for ARP/ND6 (host) entries, the need to
28 * support that case is not as high, and so it is currently not supported. It
29 * can be added later if needed. In that case, the prototype of only
30 * rttree_find_exact() will have to be changed, since rttree_add() already
31 * supports the difference by passing a full mask vs passing no mask at all.
33 * There are other differences with the BSD implementation, and certainly also
34 * more opportunities for improving performance. For now, the implementation
35 * should be good enough for its intended purpose.
38 #include "lwip.h"
39 #include "rttree.h"
41 #define RTTREE_BITS_TO_BYTE(bits) ((bits) >> 3)
42 #define RTTREE_BITS_TO_SHIFT(bits) (7 - ((bits) & 7))
43 #define RTTREE_BITS_TO_BYTES(bits) (RTTREE_BITS_TO_BYTE((bits) + 7))
46 * The given node is being added to the given routing tree, and just had its
47 * bit count assigned. Precompute any additional fields used for fast address
48 * access on the node.
50 static void
51 rttree_precompute(struct rttree * tree __unused, struct rttree_node * node)
54 node->rtn_byte = RTTREE_BITS_TO_BYTE(node->rtn_bits);
55 node->rtn_shift = RTTREE_BITS_TO_SHIFT(node->rtn_bits);
59 * For an operation on the routing tree 'tree', test whether the bit 'bit' is
60 * set or clear in 'addr'. Return 1 if the address has the bit set, 0 if it
61 * does not.
63 static unsigned int
64 rttree_test(const struct rttree * tree __unused, const void * addr,
65 unsigned int bit)
67 unsigned int byte, shift;
69 byte = RTTREE_BITS_TO_BYTE(bit);
70 shift = RTTREE_BITS_TO_SHIFT(bit);
72 return (((const uint8_t *)addr)[byte] >> shift) & 1;
76 * For an operation on the routing tree 'tree', test whether a particular bit
77 * as identified by the routing node 'node' is set or clear in 'address',
78 * effectively computing the side (left or right) to take when descending down
79 * the tree. Return 1 if the address has the bit set, 0 if it does not.
81 static inline unsigned int
82 rttree_side(const struct rttree * tree, const struct rttree_node * node,
83 const void * addr)
86 return (((const uint8_t *)addr)[node->rtn_byte] >>
87 node->rtn_shift) & 1;
91 * Check for the routing tree 'tree' whether the routing entry 'entry' matches
92 * the address 'addr' exactly. Return TRUE or FALSE depending on the outcome.
93 * This function must be called only on entries that have already been
94 * determined to span the full bit width.
96 static inline int
97 rttree_equals(const struct rttree * tree, const struct rttree_entry * entry,
98 const void * addr)
100 unsigned int bits;
102 bits = tree->rtt_bits;
104 assert(bits == entry->rte_data.rtn_bits);
106 return !memcmp(entry->rte_addr, addr, RTTREE_BITS_TO_BYTE(bits));
110 * Check for the routing tree 'tree' whether the routing entry 'entry' matches
111 * the address 'addr'. Return TRUE if the address is matched by the entry's
112 * address and mask, or FALSE if not.
114 static inline int
115 rttree_match(const struct rttree * tree, const struct rttree_entry * entry,
116 const void * addr)
118 const uint8_t *aptr, *aptr2, *mptr;
119 unsigned int bits, bytes;
121 if ((bits = entry->rte_data.rtn_bits) == 0)
122 return TRUE;
124 if ((mptr = (const uint8_t *)entry->rte_mask) == NULL)
125 return rttree_equals(tree, entry, addr);
127 aptr = (const uint8_t *)addr;
128 aptr2 = (const uint8_t *)entry->rte_addr;
130 for (bytes = RTTREE_BITS_TO_BYTES(bits); bytes > 0; bytes--) {
131 if ((*aptr & *mptr) != *aptr2)
132 return FALSE;
134 aptr++;
135 aptr2++;
136 mptr++;
139 return TRUE;
143 * Find the first bit that differs between the two given addresses. Return the
144 * bit number if found, or the full bit width if the addresses are equal.
146 static unsigned int
147 rttree_diff(const struct rttree * tree, const void * addr, const void * addr2)
149 const uint8_t *aptr, *aptr2;
150 unsigned int bit, i;
151 uint8_t b;
153 aptr = (const uint8_t *)addr;
154 aptr2 = (const uint8_t *)addr2;
156 for (bit = 0; bit < tree->rtt_bits; bit += NBBY, aptr++, aptr2++) {
157 if ((b = *aptr ^ *aptr2) != 0) {
158 for (i = 0; i < NBBY; i++)
159 if (b & (1 << (NBBY - i - 1)))
160 break;
161 return bit + i;
165 return bit;
169 * Add a link node to the free list of the given routing tree, marking it as
170 * free in the process.
172 static void
173 rttree_add_free(struct rttree * tree, struct rttree_node * node)
176 node->rtn_child[0] = NULL;
177 if ((node->rtn_child[1] = tree->rtt_free) != NULL)
178 node->rtn_child[1]->rtn_child[0] = node;
179 tree->rtt_free = node;
180 node->rtn_parent = NULL;
181 node->rtn_type = RTNT_FREE;
185 * Remove the given free link node from the free list. The caller must already
186 * have verified that the node is on the free list, and has to change the node
187 * type as appropriate afterward.
189 static void
190 rttree_del_free(struct rttree * tree, struct rttree_node * node)
193 assert(node->rtn_type == RTNT_FREE);
195 if (node->rtn_child[0] != NULL)
196 node->rtn_child[0]->rtn_child[1] = node->rtn_child[1];
197 else
198 tree->rtt_free = node->rtn_child[1];
199 if (node->rtn_child[1] != NULL)
200 node->rtn_child[1]->rtn_child[0] = node->rtn_child[0];
204 * Obtain, remove, and return a free link node from the free list. This
205 * function must be called only when it is already known that the free list is
206 * not empty. The caller has to change the node type as appropriate afterward.
208 static struct rttree_node *
209 rttree_get_free(struct rttree * tree)
211 struct rttree_node * node;
213 node = tree->rtt_free;
214 assert(node != NULL);
215 assert(node->rtn_type == RTNT_FREE);
217 rttree_del_free(tree, node);
219 return node;
223 * Initialize the given routing tree, with the given address bit width.
225 void
226 rttree_init(struct rttree * tree, unsigned int bits)
229 tree->rtt_root = NULL;
230 tree->rtt_free = NULL;
231 tree->rtt_bits = bits;
235 * Look up the most narrow routing tree entry that matches the given address.
236 * Return the entry on success, or NULL if no matching entry is found.
238 struct rttree_entry *
239 rttree_lookup_match(struct rttree * tree, const void * addr)
241 struct rttree_entry *entry, *best;
242 struct rttree_node *node;
243 unsigned int side;
246 * The current implementation is "forward-tracking", testing all
247 * potentially matching entries while descending into the tree and
248 * remembering the "best" (narrowest matching) entry. The assumption
249 * here is that most lookups will end up returning the default route or
250 * another broad route, and thus quickly fail a narrower match and bail
251 * out early. This assumption is in part motivated by the fact that
252 * our routing trees do not store link-layer (ARP/ND6) entries. If
253 * desired, the implementation can easily be rewritten to do
254 * backtracking instead.
256 best = NULL;
258 for (node = tree->rtt_root; node != NULL;
259 node = node->rtn_child[side]) {
260 if (node->rtn_type == RTNT_DATA) {
261 entry = (struct rttree_entry *)node;
263 if (!rttree_match(tree, entry, addr))
264 break;
266 best = entry;
269 side = rttree_side(tree, node, addr);
272 return best;
276 * Look up a routing entry that is an exact match for the given (full) address.
277 * Return the entry if it was found, or NULL otherwise.
279 struct rttree_entry *
280 rttree_lookup_host(struct rttree * tree, const void * addr)
282 struct rttree_entry *entry;
283 struct rttree_node *node;
284 unsigned int side;
286 for (node = tree->rtt_root; node != NULL;
287 node = node->rtn_child[side]) {
288 if (node->rtn_type == RTNT_DATA &&
289 node->rtn_bits == tree->rtt_bits) {
290 entry = (struct rttree_entry *)node;
292 if (rttree_equals(tree, entry, addr))
293 return entry;
295 break;
298 side = rttree_side(tree, node, addr);
301 return NULL;
305 * Look up a routing entry that is an exact match for the given address and
306 * prefix length. Return the entry if found, or NULL otherwise.
308 struct rttree_entry *
309 rttree_lookup_exact(struct rttree * tree, const void * addr,
310 unsigned int prefix)
312 struct rttree_entry *entry;
313 struct rttree_node *node;
314 unsigned int side;
316 for (node = tree->rtt_root; node != NULL && node->rtn_bits <= prefix;
317 node = node->rtn_child[side]) {
318 if (node->rtn_type == RTNT_DATA) {
319 entry = (struct rttree_entry *)node;
321 if (!rttree_match(tree, entry, addr))
322 return NULL;
324 if (node->rtn_bits == prefix)
325 return entry;
328 side = rttree_side(tree, node, addr);
331 return NULL;
335 * Enumerate entries in the routing tree. If 'last' is NULL, return the first
336 * entry. Otherwise, return the next entry starting from 'last'. In both
337 * cases, if no (more) entries are present in the tree, return NULL. The order
338 * of the returned entries is stable across tree modifications and the function
339 * may be called multiple times on the same entry. More specifically, it is
340 * safe to continue enumeration from a previous entry after deleting its
341 * successor from the tree.
343 struct rttree_entry *
344 rttree_enum(struct rttree * tree, struct rttree_entry * last)
346 struct rttree_node *node, *parent;
349 * For the first query, we may have to return the tree root right away.
350 * For subsequent queries, we have to move ahead by at least one node.
352 if (last == NULL) {
353 if ((node = tree->rtt_root) == NULL)
354 return NULL;
356 if (node->rtn_type == RTNT_DATA)
357 return (struct rttree_entry *)node;
358 } else
359 node = &last->rte_data;
361 /* A basic iterative pre-order binary-tree depth-first search. */
362 do {
363 assert(node != NULL);
365 /* Can we descend further, either left or right? */
366 if (node->rtn_child[0] != NULL)
367 node = node->rtn_child[0];
368 else if (node->rtn_child[1] != NULL)
369 node = node->rtn_child[1];
370 else {
372 * No. Go back up the tree, until we can go right
373 * where we went left before.. or run out of tree.
375 for (;; node = parent) {
376 if ((parent = node->rtn_parent) == NULL)
377 return NULL;
379 if (parent->rtn_child[0] == node &&
380 parent->rtn_child[1] != NULL) {
381 node = parent->rtn_child[1];
383 break;
388 /* Skip link nodes. */
389 } while (node->rtn_type != RTNT_DATA);
391 return (struct rttree_entry *)node;
395 * Set the node 'node' to be part of tree 'tree', with type 'type' (either
396 * RTNT_DATA or RTNT_LINK) and a bit count of 'prefix'. The node is set to be
397 * a child of 'parent' on side 'side', unless 'parent' is NULL in which case
398 * the node is set to be the topmost node in the tree (and 'side' is ignored).
399 * The node's children are set to 'left' and 'right'; for each, if not NULL,
400 * its parent is set to 'node'.
402 static void
403 rttree_set(struct rttree * tree, struct rttree_node * node, int type,
404 unsigned int prefix, struct rttree_node * parent, int side,
405 struct rttree_node * left, struct rttree_node * right)
408 assert(type == RTNT_DATA || type == RTNT_LINK);
409 assert(prefix <= tree->rtt_bits);
410 assert(side == 0 || side == 1);
412 node->rtn_type = type;
413 node->rtn_bits = prefix;
415 /* With rtn_bits assigned, precompute any derived fields. */
416 rttree_precompute(tree, node);
418 if ((node->rtn_parent = parent) != NULL)
419 parent->rtn_child[side] = node;
420 else
421 tree->rtt_root = node;
423 if ((node->rtn_child[0] = left) != NULL)
424 left->rtn_parent = node;
425 if ((node->rtn_child[1] = right) != NULL)
426 right->rtn_parent = node;
430 * In the routing tree 'tree', replace old node 'onode' with new node 'node',
431 * setting the type of the latter to 'type'. The tree is updated accordingly,
432 * but it is left up to the caller to deal with the old node as appropriate.
434 static void
435 rttree_replace(struct rttree * tree, struct rttree_node * onode,
436 struct rttree_node * node, int type)
438 struct rttree_node *parent;
439 unsigned int side;
442 * Replacing one data node with another data node is not something that
443 * is currently being done, even if it would work.
445 assert(onode->rtn_type != RTNT_DATA || node->rtn_type != RTNT_DATA);
446 assert(onode->rtn_child[0] != NULL);
447 assert(onode->rtn_child[1] != NULL);
449 parent = onode->rtn_parent;
451 side = (parent != NULL && parent->rtn_child[1] == onode);
453 rttree_set(tree, node, type, onode->rtn_bits, parent, side,
454 onode->rtn_child[0], onode->rtn_child[1]);
458 * Add a new routing entry 'entry' to the routing tree 'tree'. The entry
459 * object will be initialized as a result. The address to add is given as
460 * 'addr', and the address mask as 'mask'. Both those pointers must be point
461 * to memory that is as long-lived as the routing entry; this is typically
462 * accomplished by storing them in a larger object that embeds 'entry'.
463 * However, 'mask' may be NULL, signifying a host type entry with an implied
464 * full mask. If not NULL, the given mask must be normalized, i.e., it must
465 * consist of a run of zero or more 1-bits followed by a remainder of only
466 * 0-bits. The number of 1-bits must also be given as a bit count 'prefix',
467 * even if 'mask' is NULL. The address must be normalized to its mask: no bits
468 * starting from bit 'prefix' must be set in 'addr'. Return OK if adding the
469 * routing entry succeeded, or EEXIST if an entry already exists for the
470 * combination of that address and mask. If the caller has already verified
471 * with rttree_lookup_exact() that no such entry exists, the call will succeed.
474 rttree_add(struct rttree * tree, struct rttree_entry * entry,
475 const void * addr, const void * mask, unsigned int prefix)
477 struct rttree_node *node, *parent, *link;
478 struct rttree_entry *other_entry;
479 unsigned int bit = 0 /*gcc*/, side, side2;
480 int match;
482 assert(mask != NULL || prefix == tree->rtt_bits);
485 * We start by determining the path, bit count, and method of the
486 * addition. We do this with a lookup on the address, for the full
487 * address width--that is, not limited to the given prefix length. As
488 * a result, at some point we will find either a NULL pointer, or a
489 * data node with a width that is at least as large as the given prefix
490 * length. The NULL case is easy: we EXTEND the tree with our new
491 * entry wherever we ran into the NULL pointer.
493 * If instead we find a sufficiently wide data node, then we see if it
494 * is a match for the new address. If so, our new data node should
495 * either be INSERTed between two nodes along the path taken so far, or
496 * REPLACE a link node along that path with the new data node. If it
497 * it is not a match, then the action to take depends on whether the
498 * first differing bit falls within the given prefix length: if so, we
499 * have to BRANCH along the path, using a link node allocated for that
500 * differing bit; if not, we should use INSERT or REPLACE after all.
502 * As the only exceptional case, we might in fact find an entry for the
503 * exact same address and prefix length as what is being added. In the
504 * current design of the routing tree, this is always a failure case.
506 parent = NULL;
507 side = 0;
508 other_entry = NULL;
510 for (node = tree->rtt_root; node != NULL;
511 node = node->rtn_child[side]) {
512 if (node->rtn_type == RTNT_DATA) {
513 other_entry = (struct rttree_entry *)node;
515 bit = rttree_diff(tree, other_entry->rte_addr, addr);
517 match = (bit >= node->rtn_bits);
519 /* Test whether the exact entry already exists. */
520 if (match && node->rtn_bits == prefix)
521 return EEXIST;
524 * Test the INSERT/REPLACE and BRANCH cases. Note that
525 * this condition is in a terse, optimized form that
526 * does not map directly to the two different cases.
528 if (!match || node->rtn_bits > prefix) {
529 if (bit > prefix)
530 bit = prefix;
531 break;
535 parent = node;
536 side = rttree_side(tree, node, addr);
540 * At this point, addition is going to succeed no matter what. Start
541 * by initializing part of 'entry'. In particular, add the given
542 * entry's link node to the list of free link nodes, because the common
543 * case is that we end up not using it. If we do, we will just take it
544 * off again right away. The entry's data node will be initialized as
545 * part of the addition process below.
547 entry->rte_addr = addr;
548 entry->rte_mask = mask;
550 rttree_add_free(tree, &entry->rte_link);
553 * First deal with the EXTEND case. In that case we already know the
554 * intended parent and the side (left/right) for the addition.
556 if (node == NULL) {
557 assert(parent == NULL || parent->rtn_bits < prefix);
558 assert(parent == NULL || parent->rtn_child[side] == NULL);
560 rttree_set(tree, &entry->rte_data, RTNT_DATA, prefix, parent,
561 side, NULL /*left*/, NULL /*right*/);
563 return OK;
567 * For the other three cases, we now have to walk back along the path
568 * we have taken so far in order to find the correct insertion point.
570 while (parent != NULL && parent->rtn_bits >= bit) {
571 node = parent;
573 parent = node->rtn_parent;
576 if (bit == prefix && node->rtn_bits == bit) {
578 * The REPLACE case. Replace the link node 'node' with our new
579 * entry. Afterwards, mark the link node as free.
581 assert(node->rtn_type != RTNT_DATA);
583 rttree_replace(tree, node, &entry->rte_data, RTNT_DATA);
585 rttree_add_free(tree, node);
586 } else if (bit == prefix) {
588 * The INSERT case. Insert the data node between 'parent' and
589 * 'node'. Note that 'parent' may be NULL. We need to use the
590 * address we found earlier, as 'other_entry', to determine
591 * whether we should add 'node' to the left or right of the
592 * inserted data node.
594 assert(node->rtn_bits > bit);
595 assert(parent == NULL || parent->rtn_bits < bit);
596 assert(other_entry != NULL);
598 side = (parent != NULL && parent->rtn_child[1] == node);
600 side2 = rttree_test(tree, other_entry->rte_addr, bit);
602 rttree_set(tree, &entry->rte_data, RTNT_DATA, prefix, parent,
603 side, (!side2) ? node : NULL, (side2) ? node : NULL);
604 } else {
606 * The BRANCH case. In this case, it is impossible that we
607 * find a link node with a bit count equal to the first
608 * differing bit between the address we found and the address
609 * we want to insert: if such a node existed, we would have
610 * descended down its other child during the initial lookup.
612 * Interpose a link node between 'parent' and 'current' for bit
613 * 'bit', with its other child set to point to 'entry'. Again,
614 * we need to perform an additional bit test here, because even
615 * though we know that the address we found during the lookup
616 * differs from the given address at bit 'bit', we do not know
617 * the value of either bit yet.
619 assert(bit < prefix);
620 assert(node->rtn_bits > bit);
621 assert(parent == NULL || parent->rtn_bits < bit);
623 link = rttree_get_free(tree);
625 side = (parent != NULL && parent->rtn_child[1] == node);
627 side2 = rttree_test(tree, addr, bit);
629 /* Use NULL for the data node we are about to add. */
630 rttree_set(tree, link, RTNT_LINK, bit, parent, side,
631 (side2) ? node : NULL, (!side2) ? node : NULL);
633 /* This addition will replace the NULL pointer again. */
634 rttree_set(tree, &entry->rte_data, RTNT_DATA, prefix, link,
635 side2, NULL /*left*/, NULL /*right*/);
638 return OK;
642 * Remove a particular node 'node' from the routing tree 'tree'. The given
643 * node must have zero or one children. As integrity check only, if 'nonempty'
644 * is set, the node must have one child. If the node has one child, that child
645 * will be linked to the node's parent (or the tree root), thus cutting the
646 * node itself out of the tree. If the node has zero children, the
647 * corresponding slot in its parent (or the tree root) will be cleared. The
648 * function will return a pointer to the parent node if it too qualifies for
649 * removal afterwards, or NULL if no further removal action needs to be taken.
651 static struct rttree_node *
652 rttree_remove(struct rttree * tree, struct rttree_node * node,
653 int nonempty __unused)
655 struct rttree_node *parent, *child;
656 unsigned int side;
658 if ((child = node->rtn_child[0]) == NULL)
659 child = node->rtn_child[1];
661 assert(child != NULL || !nonempty);
663 if ((parent = node->rtn_parent) != NULL) {
664 side = (parent->rtn_child[1] == node);
666 parent->rtn_child[side] = child;
668 if (child != NULL)
669 child->rtn_parent = parent;
670 else if (parent->rtn_type == RTNT_LINK)
671 return parent;
672 } else {
673 tree->rtt_root = child;
675 if (child != NULL)
676 child->rtn_parent = NULL;
679 return NULL;
683 * Delete the routing entry 'entry' from the routing tree 'tree'. The entry
684 * must have been added before. This function always succeeds.
686 void
687 rttree_delete(struct rttree * tree, struct rttree_entry * entry)
689 struct rttree_node *node, *link;
692 * Remove the data node from the tree. If the data node also has two
693 * children, we have to replace it with a link node. Otherwise, we
694 * have to remove it and, if it has no children at all, possibly remove
695 * its parent as well.
697 node = &entry->rte_data;
699 assert(node->rtn_type == RTNT_DATA);
701 if (node->rtn_child[0] != NULL && node->rtn_child[1] != NULL) {
703 * The link node we allocate here may actually be the entry's
704 * own link node. We do not make an exception for that case
705 * here, as we have to deal with the entry's link node being in
706 * use a bit further down anyway.
708 link = rttree_get_free(tree);
710 rttree_replace(tree, node, link, RTNT_LINK);
711 } else {
713 * Remove the data node from the tree. If the node has no
714 * children, its removal may leave a link node with one child.
715 * That would be its original parent. That node must then also
716 * be removed from the tree, and freed up.
718 link = rttree_remove(tree, node, FALSE /*nonempty*/);
720 if (link != NULL) {
721 (void)rttree_remove(tree, link, TRUE /*nonempty*/);
723 rttree_add_free(tree, link);
728 * Remove the entry's link node from either the tree or the free list,
729 * depending on the type currently assigned to it. If it has to be
730 * removed from the tree, it must be replaced with another link node.
731 * There will always be enough link nodes available for this to work.
733 node = &entry->rte_link;
735 if (node->rtn_type == RTNT_LINK) {
736 link = rttree_get_free(tree);
738 rttree_replace(tree, node, link, RTNT_LINK);
739 } else {
740 assert(node->rtn_type == RTNT_FREE);
742 rttree_del_free(tree, node);