1 /* LWIP service - rttree.c - generic routing tree data structure */
3 * This module implements the Net/3 binary radix (Patricia) tree as described
4 * in TCP/IP Illustrated Vol.2, with a few important changes. First and
5 * foremost, we make the assumption that all address masks are "normal", i.e.,
6 * they can be expressed in terms of a "prefix length" or "bit count", meaning
7 * that the first so many bits of the mask are set and the remaining bits are
8 * all clear. Based on this assumption, we store routing entries not just in
9 * leaf nodes, but rather in a node at the bit count of the routing entry's
10 * mask; this node may then also have children. As a result, instead of "leaf"
11 * and "internal" nodes, this module instead uses "data" and "link" nodes:
13 * - Data nodes are nodes with an associated routing entry. The data node
14 * structure is always the first field of its corresponding routing entry
15 * structure. Data nodes may have zero, one, or two children. Its children
16 * are always a refinement of the address mask in the routing entry.
17 * - Link nodes are nodes with no associated routing entry. They always have
18 * exactly two children. As with BSD's "internal" nodes: since the tree
19 * needs no more than one link node per routing entry, each routing entry
20 * structure contains a link node, which may be used anywhere in the tree.
22 * The result of this approach is that we do not use a linked list for each
23 * leaf, since entries with the same address and different masks are not stored
24 * as part of the same leaf node. There is however still one case where a
25 * linked list would be necessary: the coexistence of a full-mask network entry
26 * and a host entry (net/32 vs host for IPv4, net/128 vs host for IPv6). Since
27 * this tree implementation is not used for ARP/ND6 (host) entries, the need to
28 * support that case is not as high, and so it is currently not supported. It
29 * can be added later if needed. In that case, the prototype of only
30 * rttree_find_exact() will have to be changed, since rttree_add() already
31 * supports the difference by passing a full mask vs passing no mask at all.
33 * There are other differences with the BSD implementation, and certainly also
34 * more opportunities for improving performance. For now, the implementation
35 * should be good enough for its intended purpose.
41 #define RTTREE_BITS_TO_BYTE(bits) ((bits) >> 3)
42 #define RTTREE_BITS_TO_SHIFT(bits) (7 - ((bits) & 7))
43 #define RTTREE_BITS_TO_BYTES(bits) (RTTREE_BITS_TO_BYTE((bits) + 7))
46 * The given node is being added to the given routing tree, and just had its
47 * bit count assigned. Precompute any additional fields used for fast address
51 rttree_precompute(struct rttree
* tree __unused
, struct rttree_node
* node
)
54 node
->rtn_byte
= RTTREE_BITS_TO_BYTE(node
->rtn_bits
);
55 node
->rtn_shift
= RTTREE_BITS_TO_SHIFT(node
->rtn_bits
);
59 * For an operation on the routing tree 'tree', test whether the bit 'bit' is
60 * set or clear in 'addr'. Return 1 if the address has the bit set, 0 if it
64 rttree_test(const struct rttree
* tree __unused
, const void * addr
,
67 unsigned int byte
, shift
;
69 byte
= RTTREE_BITS_TO_BYTE(bit
);
70 shift
= RTTREE_BITS_TO_SHIFT(bit
);
72 return (((const uint8_t *)addr
)[byte
] >> shift
) & 1;
76 * For an operation on the routing tree 'tree', test whether a particular bit
77 * as identified by the routing node 'node' is set or clear in 'address',
78 * effectively computing the side (left or right) to take when descending down
79 * the tree. Return 1 if the address has the bit set, 0 if it does not.
81 static inline unsigned int
82 rttree_side(const struct rttree
* tree
, const struct rttree_node
* node
,
86 return (((const uint8_t *)addr
)[node
->rtn_byte
] >>
91 * Check for the routing tree 'tree' whether the routing entry 'entry' matches
92 * the address 'addr' exactly. Return TRUE or FALSE depending on the outcome.
93 * This function must be called only on entries that have already been
94 * determined to span the full bit width.
97 rttree_equals(const struct rttree
* tree
, const struct rttree_entry
* entry
,
102 bits
= tree
->rtt_bits
;
104 assert(bits
== entry
->rte_data
.rtn_bits
);
106 return !memcmp(entry
->rte_addr
, addr
, RTTREE_BITS_TO_BYTE(bits
));
110 * Check for the routing tree 'tree' whether the routing entry 'entry' matches
111 * the address 'addr'. Return TRUE if the address is matched by the entry's
112 * address and mask, or FALSE if not.
115 rttree_match(const struct rttree
* tree
, const struct rttree_entry
* entry
,
118 const uint8_t *aptr
, *aptr2
, *mptr
;
119 unsigned int bits
, bytes
;
121 if ((bits
= entry
->rte_data
.rtn_bits
) == 0)
124 if ((mptr
= (const uint8_t *)entry
->rte_mask
) == NULL
)
125 return rttree_equals(tree
, entry
, addr
);
127 aptr
= (const uint8_t *)addr
;
128 aptr2
= (const uint8_t *)entry
->rte_addr
;
130 for (bytes
= RTTREE_BITS_TO_BYTES(bits
); bytes
> 0; bytes
--) {
131 if ((*aptr
& *mptr
) != *aptr2
)
143 * Find the first bit that differs between the two given addresses. Return the
144 * bit number if found, or the full bit width if the addresses are equal.
147 rttree_diff(const struct rttree
* tree
, const void * addr
, const void * addr2
)
149 const uint8_t *aptr
, *aptr2
;
153 aptr
= (const uint8_t *)addr
;
154 aptr2
= (const uint8_t *)addr2
;
156 for (bit
= 0; bit
< tree
->rtt_bits
; bit
+= NBBY
, aptr
++, aptr2
++) {
157 if ((b
= *aptr
^ *aptr2
) != 0) {
158 for (i
= 0; i
< NBBY
; i
++)
159 if (b
& (1 << (NBBY
- i
- 1)))
169 * Add a link node to the free list of the given routing tree, marking it as
170 * free in the process.
173 rttree_add_free(struct rttree
* tree
, struct rttree_node
* node
)
176 node
->rtn_child
[0] = NULL
;
177 if ((node
->rtn_child
[1] = tree
->rtt_free
) != NULL
)
178 node
->rtn_child
[1]->rtn_child
[0] = node
;
179 tree
->rtt_free
= node
;
180 node
->rtn_parent
= NULL
;
181 node
->rtn_type
= RTNT_FREE
;
185 * Remove the given free link node from the free list. The caller must already
186 * have verified that the node is on the free list, and has to change the node
187 * type as appropriate afterward.
190 rttree_del_free(struct rttree
* tree
, struct rttree_node
* node
)
193 assert(node
->rtn_type
== RTNT_FREE
);
195 if (node
->rtn_child
[0] != NULL
)
196 node
->rtn_child
[0]->rtn_child
[1] = node
->rtn_child
[1];
198 tree
->rtt_free
= node
->rtn_child
[1];
199 if (node
->rtn_child
[1] != NULL
)
200 node
->rtn_child
[1]->rtn_child
[0] = node
->rtn_child
[0];
204 * Obtain, remove, and return a free link node from the free list. This
205 * function must be called only when it is already known that the free list is
206 * not empty. The caller has to change the node type as appropriate afterward.
208 static struct rttree_node
*
209 rttree_get_free(struct rttree
* tree
)
211 struct rttree_node
* node
;
213 node
= tree
->rtt_free
;
214 assert(node
!= NULL
);
215 assert(node
->rtn_type
== RTNT_FREE
);
217 rttree_del_free(tree
, node
);
223 * Initialize the given routing tree, with the given address bit width.
226 rttree_init(struct rttree
* tree
, unsigned int bits
)
229 tree
->rtt_root
= NULL
;
230 tree
->rtt_free
= NULL
;
231 tree
->rtt_bits
= bits
;
235 * Look up the most narrow routing tree entry that matches the given address.
236 * Return the entry on success, or NULL if no matching entry is found.
238 struct rttree_entry
*
239 rttree_lookup_match(struct rttree
* tree
, const void * addr
)
241 struct rttree_entry
*entry
, *best
;
242 struct rttree_node
*node
;
246 * The current implementation is "forward-tracking", testing all
247 * potentially matching entries while descending into the tree and
248 * remembering the "best" (narrowest matching) entry. The assumption
249 * here is that most lookups will end up returning the default route or
250 * another broad route, and thus quickly fail a narrower match and bail
251 * out early. This assumption is in part motivated by the fact that
252 * our routing trees do not store link-layer (ARP/ND6) entries. If
253 * desired, the implementation can easily be rewritten to do
254 * backtracking instead.
258 for (node
= tree
->rtt_root
; node
!= NULL
;
259 node
= node
->rtn_child
[side
]) {
260 if (node
->rtn_type
== RTNT_DATA
) {
261 entry
= (struct rttree_entry
*)node
;
263 if (!rttree_match(tree
, entry
, addr
))
269 side
= rttree_side(tree
, node
, addr
);
276 * Look up a routing entry that is an exact match for the given (full) address.
277 * Return the entry if it was found, or NULL otherwise.
279 struct rttree_entry
*
280 rttree_lookup_host(struct rttree
* tree
, const void * addr
)
282 struct rttree_entry
*entry
;
283 struct rttree_node
*node
;
286 for (node
= tree
->rtt_root
; node
!= NULL
;
287 node
= node
->rtn_child
[side
]) {
288 if (node
->rtn_type
== RTNT_DATA
&&
289 node
->rtn_bits
== tree
->rtt_bits
) {
290 entry
= (struct rttree_entry
*)node
;
292 if (rttree_equals(tree
, entry
, addr
))
298 side
= rttree_side(tree
, node
, addr
);
305 * Look up a routing entry that is an exact match for the given address and
306 * prefix length. Return the entry if found, or NULL otherwise.
308 struct rttree_entry
*
309 rttree_lookup_exact(struct rttree
* tree
, const void * addr
,
312 struct rttree_entry
*entry
;
313 struct rttree_node
*node
;
316 for (node
= tree
->rtt_root
; node
!= NULL
&& node
->rtn_bits
<= prefix
;
317 node
= node
->rtn_child
[side
]) {
318 if (node
->rtn_type
== RTNT_DATA
) {
319 entry
= (struct rttree_entry
*)node
;
321 if (!rttree_match(tree
, entry
, addr
))
324 if (node
->rtn_bits
== prefix
)
328 side
= rttree_side(tree
, node
, addr
);
335 * Enumerate entries in the routing tree. If 'last' is NULL, return the first
336 * entry. Otherwise, return the next entry starting from 'last'. In both
337 * cases, if no (more) entries are present in the tree, return NULL. The order
338 * of the returned entries is stable across tree modifications and the function
339 * may be called multiple times on the same entry. More specifically, it is
340 * safe to continue enumeration from a previous entry after deleting its
341 * successor from the tree.
343 struct rttree_entry
*
344 rttree_enum(struct rttree
* tree
, struct rttree_entry
* last
)
346 struct rttree_node
*node
, *parent
;
349 * For the first query, we may have to return the tree root right away.
350 * For subsequent queries, we have to move ahead by at least one node.
353 if ((node
= tree
->rtt_root
) == NULL
)
356 if (node
->rtn_type
== RTNT_DATA
)
357 return (struct rttree_entry
*)node
;
359 node
= &last
->rte_data
;
361 /* A basic iterative pre-order binary-tree depth-first search. */
363 assert(node
!= NULL
);
365 /* Can we descend further, either left or right? */
366 if (node
->rtn_child
[0] != NULL
)
367 node
= node
->rtn_child
[0];
368 else if (node
->rtn_child
[1] != NULL
)
369 node
= node
->rtn_child
[1];
372 * No. Go back up the tree, until we can go right
373 * where we went left before.. or run out of tree.
375 for (;; node
= parent
) {
376 if ((parent
= node
->rtn_parent
) == NULL
)
379 if (parent
->rtn_child
[0] == node
&&
380 parent
->rtn_child
[1] != NULL
) {
381 node
= parent
->rtn_child
[1];
388 /* Skip link nodes. */
389 } while (node
->rtn_type
!= RTNT_DATA
);
391 return (struct rttree_entry
*)node
;
395 * Set the node 'node' to be part of tree 'tree', with type 'type' (either
396 * RTNT_DATA or RTNT_LINK) and a bit count of 'prefix'. The node is set to be
397 * a child of 'parent' on side 'side', unless 'parent' is NULL in which case
398 * the node is set to be the topmost node in the tree (and 'side' is ignored).
399 * The node's children are set to 'left' and 'right'; for each, if not NULL,
400 * its parent is set to 'node'.
403 rttree_set(struct rttree
* tree
, struct rttree_node
* node
, int type
,
404 unsigned int prefix
, struct rttree_node
* parent
, int side
,
405 struct rttree_node
* left
, struct rttree_node
* right
)
408 assert(type
== RTNT_DATA
|| type
== RTNT_LINK
);
409 assert(prefix
<= tree
->rtt_bits
);
410 assert(side
== 0 || side
== 1);
412 node
->rtn_type
= type
;
413 node
->rtn_bits
= prefix
;
415 /* With rtn_bits assigned, precompute any derived fields. */
416 rttree_precompute(tree
, node
);
418 if ((node
->rtn_parent
= parent
) != NULL
)
419 parent
->rtn_child
[side
] = node
;
421 tree
->rtt_root
= node
;
423 if ((node
->rtn_child
[0] = left
) != NULL
)
424 left
->rtn_parent
= node
;
425 if ((node
->rtn_child
[1] = right
) != NULL
)
426 right
->rtn_parent
= node
;
430 * In the routing tree 'tree', replace old node 'onode' with new node 'node',
431 * setting the type of the latter to 'type'. The tree is updated accordingly,
432 * but it is left up to the caller to deal with the old node as appropriate.
435 rttree_replace(struct rttree
* tree
, struct rttree_node
* onode
,
436 struct rttree_node
* node
, int type
)
438 struct rttree_node
*parent
;
442 * Replacing one data node with another data node is not something that
443 * is currently being done, even if it would work.
445 assert(onode
->rtn_type
!= RTNT_DATA
|| node
->rtn_type
!= RTNT_DATA
);
446 assert(onode
->rtn_child
[0] != NULL
);
447 assert(onode
->rtn_child
[1] != NULL
);
449 parent
= onode
->rtn_parent
;
451 side
= (parent
!= NULL
&& parent
->rtn_child
[1] == onode
);
453 rttree_set(tree
, node
, type
, onode
->rtn_bits
, parent
, side
,
454 onode
->rtn_child
[0], onode
->rtn_child
[1]);
458 * Add a new routing entry 'entry' to the routing tree 'tree'. The entry
459 * object will be initialized as a result. The address to add is given as
460 * 'addr', and the address mask as 'mask'. Both those pointers must be point
461 * to memory that is as long-lived as the routing entry; this is typically
462 * accomplished by storing them in a larger object that embeds 'entry'.
463 * However, 'mask' may be NULL, signifying a host type entry with an implied
464 * full mask. If not NULL, the given mask must be normalized, i.e., it must
465 * consist of a run of zero or more 1-bits followed by a remainder of only
466 * 0-bits. The number of 1-bits must also be given as a bit count 'prefix',
467 * even if 'mask' is NULL. The address must be normalized to its mask: no bits
468 * starting from bit 'prefix' must be set in 'addr'. Return OK if adding the
469 * routing entry succeeded, or EEXIST if an entry already exists for the
470 * combination of that address and mask. If the caller has already verified
471 * with rttree_lookup_exact() that no such entry exists, the call will succeed.
474 rttree_add(struct rttree
* tree
, struct rttree_entry
* entry
,
475 const void * addr
, const void * mask
, unsigned int prefix
)
477 struct rttree_node
*node
, *parent
, *link
;
478 struct rttree_entry
*other_entry
;
479 unsigned int bit
= 0 /*gcc*/, side
, side2
;
482 assert(mask
!= NULL
|| prefix
== tree
->rtt_bits
);
485 * We start by determining the path, bit count, and method of the
486 * addition. We do this with a lookup on the address, for the full
487 * address width--that is, not limited to the given prefix length. As
488 * a result, at some point we will find either a NULL pointer, or a
489 * data node with a width that is at least as large as the given prefix
490 * length. The NULL case is easy: we EXTEND the tree with our new
491 * entry wherever we ran into the NULL pointer.
493 * If instead we find a sufficiently wide data node, then we see if it
494 * is a match for the new address. If so, our new data node should
495 * either be INSERTed between two nodes along the path taken so far, or
496 * REPLACE a link node along that path with the new data node. If it
497 * it is not a match, then the action to take depends on whether the
498 * first differing bit falls within the given prefix length: if so, we
499 * have to BRANCH along the path, using a link node allocated for that
500 * differing bit; if not, we should use INSERT or REPLACE after all.
502 * As the only exceptional case, we might in fact find an entry for the
503 * exact same address and prefix length as what is being added. In the
504 * current design of the routing tree, this is always a failure case.
510 for (node
= tree
->rtt_root
; node
!= NULL
;
511 node
= node
->rtn_child
[side
]) {
512 if (node
->rtn_type
== RTNT_DATA
) {
513 other_entry
= (struct rttree_entry
*)node
;
515 bit
= rttree_diff(tree
, other_entry
->rte_addr
, addr
);
517 match
= (bit
>= node
->rtn_bits
);
519 /* Test whether the exact entry already exists. */
520 if (match
&& node
->rtn_bits
== prefix
)
524 * Test the INSERT/REPLACE and BRANCH cases. Note that
525 * this condition is in a terse, optimized form that
526 * does not map directly to the two different cases.
528 if (!match
|| node
->rtn_bits
> prefix
) {
536 side
= rttree_side(tree
, node
, addr
);
540 * At this point, addition is going to succeed no matter what. Start
541 * by initializing part of 'entry'. In particular, add the given
542 * entry's link node to the list of free link nodes, because the common
543 * case is that we end up not using it. If we do, we will just take it
544 * off again right away. The entry's data node will be initialized as
545 * part of the addition process below.
547 entry
->rte_addr
= addr
;
548 entry
->rte_mask
= mask
;
550 rttree_add_free(tree
, &entry
->rte_link
);
553 * First deal with the EXTEND case. In that case we already know the
554 * intended parent and the side (left/right) for the addition.
557 assert(parent
== NULL
|| parent
->rtn_bits
< prefix
);
558 assert(parent
== NULL
|| parent
->rtn_child
[side
] == NULL
);
560 rttree_set(tree
, &entry
->rte_data
, RTNT_DATA
, prefix
, parent
,
561 side
, NULL
/*left*/, NULL
/*right*/);
567 * For the other three cases, we now have to walk back along the path
568 * we have taken so far in order to find the correct insertion point.
570 while (parent
!= NULL
&& parent
->rtn_bits
>= bit
) {
573 parent
= node
->rtn_parent
;
576 if (bit
== prefix
&& node
->rtn_bits
== bit
) {
578 * The REPLACE case. Replace the link node 'node' with our new
579 * entry. Afterwards, mark the link node as free.
581 assert(node
->rtn_type
!= RTNT_DATA
);
583 rttree_replace(tree
, node
, &entry
->rte_data
, RTNT_DATA
);
585 rttree_add_free(tree
, node
);
586 } else if (bit
== prefix
) {
588 * The INSERT case. Insert the data node between 'parent' and
589 * 'node'. Note that 'parent' may be NULL. We need to use the
590 * address we found earlier, as 'other_entry', to determine
591 * whether we should add 'node' to the left or right of the
592 * inserted data node.
594 assert(node
->rtn_bits
> bit
);
595 assert(parent
== NULL
|| parent
->rtn_bits
< bit
);
596 assert(other_entry
!= NULL
);
598 side
= (parent
!= NULL
&& parent
->rtn_child
[1] == node
);
600 side2
= rttree_test(tree
, other_entry
->rte_addr
, bit
);
602 rttree_set(tree
, &entry
->rte_data
, RTNT_DATA
, prefix
, parent
,
603 side
, (!side2
) ? node
: NULL
, (side2
) ? node
: NULL
);
606 * The BRANCH case. In this case, it is impossible that we
607 * find a link node with a bit count equal to the first
608 * differing bit between the address we found and the address
609 * we want to insert: if such a node existed, we would have
610 * descended down its other child during the initial lookup.
612 * Interpose a link node between 'parent' and 'current' for bit
613 * 'bit', with its other child set to point to 'entry'. Again,
614 * we need to perform an additional bit test here, because even
615 * though we know that the address we found during the lookup
616 * differs from the given address at bit 'bit', we do not know
617 * the value of either bit yet.
619 assert(bit
< prefix
);
620 assert(node
->rtn_bits
> bit
);
621 assert(parent
== NULL
|| parent
->rtn_bits
< bit
);
623 link
= rttree_get_free(tree
);
625 side
= (parent
!= NULL
&& parent
->rtn_child
[1] == node
);
627 side2
= rttree_test(tree
, addr
, bit
);
629 /* Use NULL for the data node we are about to add. */
630 rttree_set(tree
, link
, RTNT_LINK
, bit
, parent
, side
,
631 (side2
) ? node
: NULL
, (!side2
) ? node
: NULL
);
633 /* This addition will replace the NULL pointer again. */
634 rttree_set(tree
, &entry
->rte_data
, RTNT_DATA
, prefix
, link
,
635 side2
, NULL
/*left*/, NULL
/*right*/);
642 * Remove a particular node 'node' from the routing tree 'tree'. The given
643 * node must have zero or one children. As integrity check only, if 'nonempty'
644 * is set, the node must have one child. If the node has one child, that child
645 * will be linked to the node's parent (or the tree root), thus cutting the
646 * node itself out of the tree. If the node has zero children, the
647 * corresponding slot in its parent (or the tree root) will be cleared. The
648 * function will return a pointer to the parent node if it too qualifies for
649 * removal afterwards, or NULL if no further removal action needs to be taken.
651 static struct rttree_node
*
652 rttree_remove(struct rttree
* tree
, struct rttree_node
* node
,
653 int nonempty __unused
)
655 struct rttree_node
*parent
, *child
;
658 if ((child
= node
->rtn_child
[0]) == NULL
)
659 child
= node
->rtn_child
[1];
661 assert(child
!= NULL
|| !nonempty
);
663 if ((parent
= node
->rtn_parent
) != NULL
) {
664 side
= (parent
->rtn_child
[1] == node
);
666 parent
->rtn_child
[side
] = child
;
669 child
->rtn_parent
= parent
;
670 else if (parent
->rtn_type
== RTNT_LINK
)
673 tree
->rtt_root
= child
;
676 child
->rtn_parent
= NULL
;
683 * Delete the routing entry 'entry' from the routing tree 'tree'. The entry
684 * must have been added before. This function always succeeds.
687 rttree_delete(struct rttree
* tree
, struct rttree_entry
* entry
)
689 struct rttree_node
*node
, *link
;
692 * Remove the data node from the tree. If the data node also has two
693 * children, we have to replace it with a link node. Otherwise, we
694 * have to remove it and, if it has no children at all, possibly remove
695 * its parent as well.
697 node
= &entry
->rte_data
;
699 assert(node
->rtn_type
== RTNT_DATA
);
701 if (node
->rtn_child
[0] != NULL
&& node
->rtn_child
[1] != NULL
) {
703 * The link node we allocate here may actually be the entry's
704 * own link node. We do not make an exception for that case
705 * here, as we have to deal with the entry's link node being in
706 * use a bit further down anyway.
708 link
= rttree_get_free(tree
);
710 rttree_replace(tree
, node
, link
, RTNT_LINK
);
713 * Remove the data node from the tree. If the node has no
714 * children, its removal may leave a link node with one child.
715 * That would be its original parent. That node must then also
716 * be removed from the tree, and freed up.
718 link
= rttree_remove(tree
, node
, FALSE
/*nonempty*/);
721 (void)rttree_remove(tree
, link
, TRUE
/*nonempty*/);
723 rttree_add_free(tree
, link
);
728 * Remove the entry's link node from either the tree or the free list,
729 * depending on the type currently assigned to it. If it has to be
730 * removed from the tree, it must be replaced with another link node.
731 * There will always be enough link nodes available for this to work.
733 node
= &entry
->rte_link
;
735 if (node
->rtn_type
== RTNT_LINK
) {
736 link
= rttree_get_free(tree
);
738 rttree_replace(tree
, node
, link
, RTNT_LINK
);
740 assert(node
->rtn_type
== RTNT_FREE
);
742 rttree_del_free(tree
, node
);