etc/services - sync with NetBSD-8
[minix.git] / minix / net / lwip / route.c
blob11a77fb330dab4d3e504ce881166aea91e502957
1 /* LWIP service - route.c - route management */
2 /*
3 * This module provides a destination-based routing implementation, roughly
4 * matching the routing as done traditionally by the BSDs and by current NetBSD
5 * in particular. As such, this implementation almost completely replaces
6 * lwIP's own more limited (and less rigid) routing algorithms. It does this
7 * using a combination of overriding lwIP functions (ip4_route, ip6_route) with
8 * weak-symbol patching, and lwIP-provided gateway hooks. Especially the
9 * former gives us a level of control that lwIP's routing hooks do not provide:
10 * not only does such overriding give us the ability to flag that no route was
11 * found at all, we also bypass a number of default decisions taken by lwIP
12 * where the routing hooks are not called at all.
14 * As a result, the routing tables as visible to the user are an almost
15 * completely accurate reflection of the routing decisions taken by this TCP/IP
16 * stack in practice. There is currently only one exception: for IPv4 gateway
17 * selection, lwIP will bypass the gateway hook if the given address is on the
18 * local subnet according to the locally assigned IP address and subnet mask.
19 * This exception should practically affect noone, though.
21 * Our routing implementation differs from NetBSD's in various aspects, though.
22 * Perhaps the most important one, also noted elsewhere, is that we do not
23 * support the coexistence of an all-bits-set network route and a host route
24 * for the same IP address. If necessary, this issue can be resolved.
26 * We use a custom concept of "immutable" routes for local addresses, which are
27 * a somewhat special case as explained in the ifaddr module. Since those
28 * RTF_LOCAL routes cannot be deleted, a small change is made to the route(8)
29 * flush-all command to skip them. Packets directed at local addresses on
30 * non-loopback interfaces are handled in a way that differs from NetBSD's,
31 * too. This is explained in the ifdev module.
33 * The BSDs support special routes that reject or blackhole packets, based on
34 * routing flags. We support such routes as well, but implement them somewhat
35 * differently from the BSDs: such packets always get routed over a loopback
36 * interface (regardless of their associated interface), in order to save on
37 * routing lookups for packets in the common case.
39 * As general rules of thumb: if there is no route to a destination, assignment
40 * of a local address will already fail with a "no route to host" error. If
41 * there is an RTF_REJECT route, a local address will be assigned, but actual
42 * packets will be routed to a loopback interface and result in a "no route to
43 * host" error upon reception there - this is what NetBSD seems to do too, even
44 * though the documentation says that RTF_REJECT routes generate ICMP messages
45 * instead. RTF_BLACKHOLE behaves similarly to RTF_REJECT, except that the
46 * packet is simply discarded upon receipt by the loopback interface.
48 * In various places, both here and elsewhere, we check to make sure that on
49 * routing and output, scoped IPv6 source and destination addresses never leave
50 * their zone. For example, a packet must not be sent to an outgoing interface
51 * if its source address is a link-local address with a zone for another
52 * interface. lwIP does not check for such violations, and so we must make
53 * sure that this does not happen ourselves.
55 * Normally, one would tell lwIP to use a particular default IPv4 gateway by
56 * associating the gateway address to a particular interface, and then setting
57 * that interface as default interface (netif_default). We explicitly do
58 * neither of these things. Instead, the routing hooks should return the
59 * default route whenever applicable, and the gateway hooks should return the
60 * default route's gateway IP address whenever needed.
62 * Due to lwIP's limited set of error codes, we do not properly distinguish
63 * between cases where EHOSTUNREACH or ENETUNREACH should be thrown, and throw
64 * the former in most cases.
67 #include "lwip.h"
68 #include "ifaddr.h"
69 #include "rttree.h"
70 #include "rtsock.h"
71 #include "route.h"
72 #include "lldata.h"
74 #include "lwip/nd6.h"
77 * The maximum number of uint8_t bytes needed to represent a routing address.
78 * This value is the maximum of 4 (for IPv4) and 16 (for IPv6).
80 #define ROUTE_ADDR_MAX (MAX(IP4_BITS, IP6_BITS) / NBBY)
83 * We use a shared routing entry data structure for IPv4 and IPv6 routing
84 * entries. The result is cleaner code at the cost of (currently) about 2.3KB
85 * of memory wasted (costing 12 bytes per address for three addresses for 64 of
86 * the 128 routing entries that would be for IPv4), although with the benefit
87 * that either address family may use more than half of the routing entries.
88 * From that 2.3KB, 1KB can be reclaimed by moving the destination address and
89 * mask into the rttree_entry data structure, at the cost of its generality.
91 struct route_entry {
92 struct rttree_entry re_entry; /* routing tree entry */
93 union pxfer_re_pu {
94 struct ifdev *repu_ifdev; /* associated interface */
95 SIMPLEQ_ENTRY(route_entry) repu_next; /* next free pointer */
96 } re_pu;
97 unsigned int re_flags; /* routing flags (RTF_) */
98 unsigned int re_use; /* number of times used */
99 uint8_t re_addr[ROUTE_ADDR_MAX]; /* destination address */
100 uint8_t re_mask[ROUTE_ADDR_MAX]; /* destination mask */
101 union ixfer_re_gu {
102 ip4_addr_p_t regu_gw4; /* gateway (IPv4) */
103 ip6_addr_p_t regu_gw6; /* gateway (IPv6) */
104 } re_gu;
106 #define re_ifdev re_pu.repu_ifdev
107 #define re_next re_pu.repu_next
108 #define re_gw4 re_gu.regu_gw4
109 #define re_gw6 re_gu.regu_gw6
111 /* Routes for local addresses are immutable, for reasons explained in ifdev. */
112 #define route_is_immutable(route) ((route)->re_flags & RTF_LOCAL)
115 * We override a subset of the BSD routing flags in order to store our own
116 * local settings. In particular, we have to have a way to store whether a
117 * route is for an IPv4 or IPv6 destination address. We override BSD's
118 * RTF_DONE flag for this: RTF_DONE is only used with routing sockets, and
119 * never associated with actual routes. In contrast, RTF_IPV6 is only used
120 * with actual routes, and never sent across routing sockets. In general,
121 * overriding flags is preferable to adding new ones, as BSD might later add
122 * more flags itself as well, while it can never remove existing flags.
124 #define RTF_IPV6 RTF_DONE /* route is for an IPv6 destination */
126 /* The total number of routing entries (IPv4 and IPv6 combined). */
127 #define NR_ROUTE_ENTRY 128
129 static struct route_entry route_array[NR_ROUTE_ENTRY]; /* routing entries */
131 static SIMPLEQ_HEAD(, route_entry) route_freelist; /* free entry list */
133 /* The routing trees. There are two: one for IPv4 and one for IPv6. */
134 #define ROUTE_TREE_V4 0
135 #define ROUTE_TREE_V6 1
136 #define NR_ROUTE_TREE 2
138 static struct rttree route_tree[NR_ROUTE_TREE];
140 /* We support a single cached routing entry per address family (IPv4, IPv6). */
141 static int rtcache_v4set;
142 static ip4_addr_t rtcache_v4addr;
143 static struct route_entry *rtcache_v4route;
145 static int rtcache_v6set;
146 static ip6_addr_t rtcache_v6addr;
147 static struct route_entry *rtcache_v6route;
150 * Initialize the routing cache. There are a lot of trivial functions here,
151 * but this is designed to be extended in the future.
153 static void
154 rtcache_init(void)
157 rtcache_v4set = FALSE;
158 rtcache_v6set = FALSE;
162 * Look up the given IPv4 address in the routing cache. If there is a match,
163 * return TRUE with the associated route in 'route', possibly NULL if a
164 * negative result was cached. Return FALSE if the routing cache does not
165 * cache the given IPv4 address.
167 static inline int
168 rtcache_lookup_v4(const ip4_addr_t * ipaddr, struct route_entry ** route)
171 if (rtcache_v4set && ip4_addr_cmp(&rtcache_v4addr, ipaddr)) {
172 *route = rtcache_v4route;
174 return TRUE;
175 } else
176 return FALSE;
180 * Add the given IPv4 address and the given routing entry (NULL for negative
181 * caching) to the routing cache.
183 static inline void
184 rtcache_add_v4(const ip4_addr_t * ipaddr, struct route_entry * route)
187 rtcache_v4addr = *ipaddr;
188 rtcache_v4route = route;
189 rtcache_v4set = TRUE;
193 * Reset the IPv4 routing cache.
195 static void
196 rtcache_reset_v4(void)
199 rtcache_v4set = FALSE;
203 * Look up the given IPv6 address in the routing cache. If there is a match,
204 * return TRUE with the associated route in 'route', possibly NULL if a
205 * negative result was cached. Return FALSE if the routing cache does not
206 * cache the given IPv6 address.
208 static inline int
209 rtcache_lookup_v6(const ip6_addr_t * ipaddr, struct route_entry ** route)
212 if (rtcache_v6set && ip6_addr_cmp(&rtcache_v6addr, ipaddr)) {
213 *route = rtcache_v6route;
215 return TRUE;
216 } else
217 return FALSE;
221 * Add the given IPv6 address and the given routing entry (NULL for negative
222 * caching) to the routing cache. Caching of scoped addresses without zones is
223 * not supported.
225 static inline void
226 rtcache_add_v6(const ip6_addr_t * ipaddr, struct route_entry * route)
229 rtcache_v6addr = *ipaddr;
230 rtcache_v6route = route;
231 rtcache_v6set = TRUE;
235 * Reset the IPv6 routing cache.
237 static void
238 rtcache_reset_v6(void)
241 rtcache_v6set = FALSE;
245 * Initialize the routing module.
247 void
248 route_init(void)
250 unsigned int slot;
252 /* Initialize the routing trees. */
253 rttree_init(&route_tree[ROUTE_TREE_V4], IP4_BITS);
254 rttree_init(&route_tree[ROUTE_TREE_V6], IP6_BITS);
256 /* Initialize the list of free routing entries. */
257 SIMPLEQ_INIT(&route_freelist);
259 for (slot = 0; slot < __arraycount(route_array); slot++)
260 SIMPLEQ_INSERT_TAIL(&route_freelist, &route_array[slot],
261 re_next);
263 /* Reset the routing cache. */
264 rtcache_init();
268 * Prepare for a routing tree operation by converting the given IPv4 address
269 * into a raw address that can be used in that routing tree operation.
271 static inline void
272 route_prepare_v4(const ip4_addr_t * ip4addr, uint8_t rtaddr[ROUTE_ADDR_MAX])
274 uint32_t val;
276 val = ip4_addr_get_u32(ip4addr);
278 memcpy(rtaddr, &val, sizeof(val));
282 * Prepare for a routing tree operation by converting the given IPv6 address
283 * into a raw address that can be used in that routing tree operation. If the
284 * given prefix length allows for it, also incorporate the address zone.
286 static inline void
287 route_prepare_v6(const ip6_addr_t * ip6addr, unsigned int prefix,
288 uint8_t rtaddr[ROUTE_ADDR_MAX])
291 assert(sizeof(ip6addr->addr) == IP6_BITS / NBBY);
294 * TODO: in most cases, we could actually return a pointer to the
295 * address contained in the given lwIP IP address structure. However,
296 * doing so would make a lot things quite a bit messier around here,
297 * but the small performance gain may still make it worth it.
299 memcpy(rtaddr, ip6addr->addr, sizeof(ip6addr->addr));
302 * Embed the zone ID into the address, KAME style. This is the
303 * easiest way to have link-local addresses for multiple interfaces
304 * coexist in a single routing tree. Do this only if the full zone ID
305 * would be included in the prefix though, or we might de-normalize the
306 * address.
308 if (ip6_addr_has_zone(ip6addr) && prefix >= 32)
309 rtaddr[3] = ip6_addr_zone(ip6addr);
313 * Prepare for a routing tree operation by converting the given IP address into
314 * a raw address that can be used in that routing tree operation. The given
315 * address's zone ID is embedded "KAME-style" into the raw (IPv6) address when
316 * applicable and if the given prefix length allows for it. Return the index
317 * of the routing tree to use (ROUTE_TREE_V4 or ROUTE_TREE_V6).
319 static unsigned int
320 route_prepare(const ip_addr_t * ipaddr, unsigned int prefix,
321 uint8_t rtaddr[ROUTE_ADDR_MAX])
324 switch (IP_GET_TYPE(ipaddr)) {
325 case IPADDR_TYPE_V4:
326 route_prepare_v4(ip_2_ip4(ipaddr), rtaddr);
328 return ROUTE_TREE_V4;
330 case IPADDR_TYPE_V6:
331 route_prepare_v6(ip_2_ip6(ipaddr), prefix, rtaddr);
333 return ROUTE_TREE_V6;
335 default:
336 panic("unknown IP address type: %u", IP_GET_TYPE(ipaddr));
341 * The given routing tree (ROUTE_TREE_V4 or ROUTE_TREE_V6) has been updated.
342 * Invalidate any cache entries that may now have become stale, both locally
343 * and in lwIP.
345 static void
346 route_updated(unsigned int tree)
349 if (tree == ROUTE_TREE_V6) {
350 rtcache_reset_v6();
353 * Also clear the lwIP ND6 destination cache, which may now
354 * contain entries for the wrong gateway.
356 nd6_clear_destination_cache();
357 } else
358 rtcache_reset_v4();
362 * Add a route to the appropriate routing table. The address, address zone,
363 * prefix, and RTF_HOST flag in the flags field make up the identity of the
364 * route. If the flags field contains RTF_GATEWAY, a gateway must be given;
365 * otherwise, it must be NULL. The route is associated with the given
366 * interface, which may not be NULL. The caller must ensure that the flags
367 * field does not contain unsupported flags. On success, return OK, and also
368 * also announce the addition. On failure, return a negative error code.
371 route_add(const ip_addr_t * addr, unsigned int prefix,
372 const ip_addr_t * gateway, struct ifdev * ifdev, unsigned int flags,
373 const struct rtsock_request * rtr)
375 struct route_entry *route;
376 unsigned int tree, byte;
377 int r;
379 assert(flags & RTF_UP);
380 assert(!!(flags & RTF_GATEWAY) == (gateway != NULL));
381 assert(ifdev != NULL);
383 /* Get a routing entry, if any are available. */
384 if (SIMPLEQ_EMPTY(&route_freelist))
385 return ENOBUFS;
387 route = SIMPLEQ_FIRST(&route_freelist);
390 * Perform sanity checks on the input, and fill in enough of the
391 * routing entry to be able to try and add it to the routing tree.
393 memset(route->re_addr, 0, sizeof(route->re_addr));
395 tree = route_prepare(addr, prefix, route->re_addr);
397 switch (tree) {
398 case ROUTE_TREE_V4:
399 if (prefix > IP4_BITS ||
400 (prefix != IP4_BITS && (flags & RTF_HOST)))
401 return EINVAL;
403 flags &= ~RTF_IPV6;
405 break;
407 case ROUTE_TREE_V6:
408 if (prefix > IP6_BITS ||
409 (prefix != IP6_BITS && (flags & RTF_HOST)))
410 return EINVAL;
412 flags |= RTF_IPV6;
414 break;
416 default:
417 return EINVAL;
420 /* Generate the (raw) network mask. This is protocol agnostic! */
421 addr_make_netmask(route->re_mask, sizeof(route->re_mask), prefix);
423 /* The given address must be normalized to its mask. */
424 for (byte = 0; byte < __arraycount(route->re_addr); byte++)
425 if ((route->re_addr[byte] & ~route->re_mask[byte]) != 0)
426 return EINVAL;
429 * Attempt to add the routing entry. Host-type entries do not have an
430 * associated mask, enabling ever-so-slightly faster matching.
432 if ((r = rttree_add(&route_tree[tree], &route->re_entry,
433 route->re_addr, (flags & RTF_HOST) ? NULL : route->re_mask,
434 prefix)) != OK)
435 return r;
438 * Success. Finish the routing entry. Remove the entry from the free
439 * list before assigning re_ifdev, as these two use the same memory.
441 SIMPLEQ_REMOVE_HEAD(&route_freelist, re_next);
443 route->re_ifdev = ifdev;
444 route->re_flags = flags;
447 * Store the gateway if one is given. Store the address in lwIP format
448 * because that is the easiest way use it later again. Store it as a
449 * union to keep the route entry structure as small as possible. Store
450 * the address without its zone, because the gateway's address zone is
451 * implied by its associated ifdev.
453 * If no gateway is given, this is a link-type route, i.e., a route for
454 * a local network, with all nodes directly connected and reachable.
456 if (flags & RTF_GATEWAY) {
457 if (flags & RTF_IPV6)
458 ip6_addr_copy_to_packed(route->re_gw6,
459 *ip_2_ip6(gateway));
460 else
461 ip4_addr_copy(route->re_gw4, *ip_2_ip4(gateway));
464 /* We have made routing changes. */
465 route_updated(tree);
467 /* Announce the route addition. */
468 rtsock_msg_route(route, RTM_ADD, rtr);
470 return OK;
474 * Check whether it is possible to add a route for the given destination to the
475 * corresponding routing table, that is, a subsequent route_add() call for this
476 * destination address is guaranteed to succeed (if all its parameters are
477 * valid). Return TRUE if adding the route is guaranteed to succeed, or FALSE
478 * if creating a route for the given destination would fail.
481 route_can_add(const ip_addr_t * addr, unsigned int prefix,
482 int is_host __unused)
484 uint8_t rtaddr[ROUTE_ADDR_MAX];
485 unsigned int tree;
487 tree = route_prepare(addr, prefix, rtaddr);
490 * The corresponding routing tree must not already contain an exact
491 * match for the destination. If the routing tree implementation is
492 * ever extended with support for coexisting host and net entries with
493 * the same prefix, we should also pass in 'is_host' here.
495 if (rttree_lookup_exact(&route_tree[tree], rtaddr, prefix) != NULL)
496 return FALSE;
498 /* There must be a routing entry on the free list as well. */
499 return !SIMPLEQ_EMPTY(&route_freelist);
503 * Find a route with the exact given route identity. Return the route if
504 * found, or NULL if no route exists with this identity.
506 struct route_entry *
507 route_find(const ip_addr_t * addr, unsigned int prefix, int is_host)
509 struct rttree_entry *entry;
510 struct route_entry *route;
511 uint8_t rtaddr[ROUTE_ADDR_MAX];
512 unsigned int tree;
514 tree = route_prepare(addr, prefix, rtaddr);
516 entry = rttree_lookup_exact(&route_tree[tree], rtaddr, prefix);
517 if (entry == NULL)
518 return NULL;
520 route = (struct route_entry *)entry;
523 * As long as the routing tree code does not support coexisting host
524 * and net entries with the same prefix, we have to check the type.
526 if (!!(route->re_flags & RTF_HOST) != is_host)
527 return NULL;
529 return route;
533 * A route lookup failed for the given IP address. Generate an RTM_MISS
534 * message on routing sockets.
536 static void
537 route_miss(const ip_addr_t * ipaddr)
539 union sockaddr_any addr;
540 socklen_t addr_len;
542 addr_len = sizeof(addr);
544 addr_put_inet(&addr.sa, &addr_len, ipaddr, TRUE /*kame*/, 0 /*port*/);
546 rtsock_msg_miss(&addr.sa);
550 * A route lookup failed for the given IPv4 address. Generate an RTM_MISS
551 * message on routing sockets.
553 static void
554 route_miss_v4(const ip4_addr_t * ip4addr)
556 ip_addr_t ipaddr;
558 ip_addr_copy_from_ip4(ipaddr, *ip4addr);
560 route_miss(&ipaddr);
564 * A route lookup failed for the given IPv6 address. Generate an RTM_MISS
565 * message on routing sockets.
567 static void
568 route_miss_v6(const ip6_addr_t * ip6addr)
570 ip_addr_t ipaddr;
572 ip_addr_copy_from_ip6(ipaddr, *ip6addr);
574 route_miss(&ipaddr);
578 * Look up the most narrow matching routing entry for the given IPv4 address.
579 * Return the routing entry if one exists at all, or NULL otherwise. This
580 * function performs caching.
582 static inline struct route_entry *
583 route_lookup_v4(const ip4_addr_t * ip4addr)
585 uint8_t rtaddr[ROUTE_ADDR_MAX];
586 struct route_entry *route;
589 * Look up the route for the destination IP address, unless we have a
590 * cached route entry. We cache negatives in order to avoid generating
591 * lots of RTM_MISS messages for the same destination in a row.
593 if (rtcache_lookup_v4(ip4addr, &route))
594 return route;
596 route_prepare_v4(ip4addr, rtaddr);
598 route = (struct route_entry *)
599 rttree_lookup_match(&route_tree[ROUTE_TREE_V4], rtaddr);
601 /* Cache the result, even if we found no route. */
602 rtcache_add_v4(ip4addr, route);
604 return route;
608 * Look up the most narrow matching routing entry for the given IPv6 address,
609 * taking into account its zone ID if applicable. Return the routing entry if
610 * one exists at all, or NULL otherwise. This function performs caching.
612 static inline struct route_entry *
613 route_lookup_v6(const ip6_addr_t * ip6addr)
615 uint8_t rtaddr[ROUTE_ADDR_MAX];
616 struct route_entry *route;
617 int use_cache;
620 * We do not support caching of addresses that should have a zone but
621 * do not: in different contexts, such addresses could yield different
622 * routes.
624 use_cache = !ip6_addr_lacks_zone(ip6addr, IP6_UNKNOWN);
626 if (use_cache && rtcache_lookup_v6(ip6addr, &route))
627 return route;
629 route_prepare_v6(ip6addr, IP6_BITS, rtaddr);
631 route = (struct route_entry *)
632 rttree_lookup_match(&route_tree[ROUTE_TREE_V6], rtaddr);
634 /* Cache the result, even if no route was found. */
635 if (use_cache)
636 rtcache_add_v6(ip6addr, route);
638 return route;
642 * Look up the most narrow matching routing entry for the given IP address,
643 * taking into account its zone ID if applicable. Return the routing entry if
644 * one exists at all, or NULL otherwise. This function performs caching.
646 struct route_entry *
647 route_lookup(const ip_addr_t * addr)
650 if (IP_IS_V4(addr))
651 return route_lookup_v4(ip_2_ip4(addr));
652 else
653 return route_lookup_v6(ip_2_ip6(addr));
657 * Change an existing routing entry. Its flags are always updated to the new
658 * set of given flags, although certain flags are always preserved. If the
659 * new flags set has RTF_GATEWAY set and 'gateway' is not NULL, update the
660 * gateway associated with the route. If 'ifdev' is not NULL, reassociate the
661 * route with the given interface; this will not affect the zone of the
662 * route's destination address. On success, return OK, and also announce the
663 * change. On failure, return a negative error code.
665 static int
666 route_change(struct route_entry * route, const ip_addr_t * gateway,
667 struct ifdev * ifdev, unsigned int flags,
668 const struct rtsock_request * rtr)
670 unsigned int tree, preserve;
672 tree = (route->re_flags & RTF_IPV6) ? ROUTE_TREE_V6 : ROUTE_TREE_V4;
674 /* Update the associated interface (only) if a new one is given. */
675 if (ifdev != NULL)
676 route->re_ifdev = ifdev;
679 * These flags may not be changed. RTF_UP should always be set anyway.
680 * RTF_HOST and RTF_IPV6 are part of the route's identity. RTF_LOCAL
681 * should be preserved as well, although we will not get here if either
682 * the old or the new flags have it set anyway.
684 preserve = RTF_UP | RTF_HOST | RTF_IPV6 | RTF_LOCAL;
686 /* Always update the flags. There is no way not to. */
687 route->re_flags = (route->re_flags & preserve) | (flags & ~preserve);
690 * If a new gateway is given *and* RTF_GATEWAY is set, update the
691 * gateway. If RTF_GATEWAY is not set, this is a link-type route with
692 * no gateway. If no new gateway is given, we keep the gateway as is.
694 if (gateway != NULL && (flags & RTF_GATEWAY)) {
695 if (flags & RTF_IPV6)
696 ip6_addr_copy_to_packed(route->re_gw6,
697 *ip_2_ip6(gateway));
698 else
699 ip4_addr_copy(route->re_gw4, *ip_2_ip4(gateway));
702 /* We have made routing changes. */
703 route_updated(tree);
705 /* Announce the route change. */
706 rtsock_msg_route(route, RTM_CHANGE, rtr);
708 return OK;
712 * Delete the given route, and announce its deletion.
714 void
715 route_delete(struct route_entry * route, const struct rtsock_request * rtr)
717 unsigned int tree;
719 /* First announce the deletion, while the route is still around. */
720 tree = (route->re_flags & RTF_IPV6) ? ROUTE_TREE_V6 : ROUTE_TREE_V4;
722 rtsock_msg_route(route, RTM_DELETE, rtr);
724 /* Then actually delete the route. */
725 rttree_delete(&route_tree[tree], &route->re_entry);
727 SIMPLEQ_INSERT_HEAD(&route_freelist, route, re_next);
729 /* We have made routing changes. */
730 route_updated(tree);
734 * Delete all routes associated with the given interface, typically as part of
735 * destroying the interface.
737 void
738 route_clear(struct ifdev * ifdev)
740 struct rttree_entry *entry, *parent;
741 struct route_entry *route;
742 unsigned int tree;
745 * Delete all routes associated with the given interface. Fortunately,
746 * we need not also delete addresses zoned to the given interface,
747 * because no route can be created with a zone ID that does not match
748 * the associated interface. That is the main reason why we ignore
749 * zone IDs for gateways when adding or changing routes..
751 for (tree = 0; tree < NR_ROUTE_TREE; tree++) {
752 parent = NULL;
754 while ((entry = rttree_enum(&route_tree[tree],
755 parent)) != NULL) {
756 route = (struct route_entry *)entry;
758 if (route->re_ifdev == ifdev)
759 route_delete(route, NULL /*request*/);
760 else
761 parent = entry;
767 * Process a routing command specifically for an IPv4 or IPv6 route, as one of
768 * the specific continuations of processing started by route_process(). The
769 * RTM_ routing command is given as 'type'. The route destination is given as
770 * 'dst_addr'; its address type determines whether the operation is for IPv4 or
771 * IPv6. The sockaddr structures for 'mask' and 'gateway' are passed on as is
772 * and may have to be parsed here if not NULL. 'ifdev' is the interface to be
773 * associated with a route; it is non-NULL only if an interface name (IFP) or
774 * address (IFA) was given. The RTF_ flags field 'flags' has been checked
775 * against the globally supported flags, but may have to be checked for flags
776 * that do not apply to IPv4/IPv6 routes. Return OK or a negative error code,
777 * following the same semantics as route_process().
779 static int
780 route_process_inet(unsigned int type, const ip_addr_t * dst_addr,
781 const struct sockaddr * mask, const struct sockaddr * gateway,
782 struct ifdev * ifdev, unsigned int flags,
783 const struct rtsock_request * rtr)
785 struct route_entry *route;
786 ip_addr_t gw_storage, *gw_addr;
787 struct ifdev *ifdev2;
788 uint32_t zone;
789 unsigned int prefix;
790 int r;
792 assert(!(flags & RTF_LLDATA));
794 if ((flags & (RTF_DYNAMIC | RTF_MODIFIED | RTF_DONE | RTF_XRESOLVE |
795 RTF_LLINFO | RTF_CLONED | RTF_SRC | RTF_ANNOUNCE |
796 RTF_BROADCAST)) != 0)
797 return EINVAL;
800 * For network entries, a network mask must be provided in all cases.
801 * For host entries, the network mask is ignored, and we use a prefix
802 * with all bits set.
804 if (!(flags & RTF_HOST)) {
805 if (mask == NULL)
806 return EINVAL;
808 if ((r = addr_get_netmask(mask, mask->sa_len,
809 IP_GET_TYPE(dst_addr), &prefix, NULL /*ipaddr*/)) != OK)
810 return r;
811 } else {
812 if (IP_IS_V4(dst_addr))
813 prefix = IP4_BITS;
814 else
815 prefix = IP6_BITS;
818 gw_addr = NULL;
821 * Determine the gateway and interface for the routing entry, if
822 * applicable.
824 if (type == RTM_ADD || type == RTM_CHANGE) {
826 * The RTF_UP flag must always be set, but only if the flags
827 * field is used at all.
829 if (!(flags & RTF_UP))
830 return EINVAL;
832 if ((flags & RTF_GATEWAY) && gateway != NULL) {
833 if ((r = addr_get_inet(gateway, gateway->sa_len,
834 IP_GET_TYPE(dst_addr), &gw_storage, TRUE /*kame*/,
835 NULL /*port*/)) != OK)
836 return r;
838 gw_addr = &gw_storage;
841 * We use the zone of the gateway to help determine the
842 * interface, but we do not reject a mismatching zone
843 * here. The reason for this is that we do not want
844 * routes that have zones for an interface other than
845 * the one associated with the route, as that could
846 * create a world of trouble: packets leaving their
847 * zone, complications with cleaning up interfaces..
849 if (IP_IS_V6(gw_addr) &&
850 ip6_addr_has_zone(ip_2_ip6(gw_addr))) {
851 zone = ip6_addr_zone(ip_2_ip6(gw_addr));
853 ifdev2 = ifdev_get_by_index(zone);
855 if (ifdev != NULL && ifdev != ifdev2)
856 return EINVAL;
857 else
858 ifdev = ifdev2;
862 * If we still have no interface at this point, see if
863 * we can find one based on just the gateway address.
864 * See if a locally attached network owns the address.
865 * That may not succeed, leaving ifdev set to NULL.
867 if (ifdev == NULL)
868 ifdev = ifaddr_map_by_subnet(gw_addr);
872 * When adding routes, all necessary information must be given.
873 * When changing routes, we can leave some settings as is.
875 if (type == RTM_ADD) {
876 if ((flags & RTF_GATEWAY) && gw_addr == NULL)
877 return EINVAL;
879 /* TODO: try harder to find a matching interface.. */
880 if (ifdev == NULL)
881 return ENETUNREACH;
886 * All route commands except RTM_ADD require that a route exists for
887 * the given identity, although RTM_GET, when requesting a host entry,
888 * may return a wider (network) route based on just the destination
889 * address.
891 if (type != RTM_ADD) {
892 /* For RTM_GET (only), a host query may return a net route. */
893 if (type == RTM_GET && (flags & RTF_HOST))
894 route = route_lookup(dst_addr);
895 else
896 route = route_find(dst_addr, prefix,
897 !!(flags & RTF_HOST));
899 if (route == NULL)
900 return ESRCH;
901 } else
902 route = NULL;
904 /* Process the actual routing command. */
905 switch (type) {
906 case RTM_ADD:
907 return route_add(dst_addr, prefix, gw_addr, ifdev, flags, rtr);
909 case RTM_CHANGE:
910 /* Routes for local addresses are immutable. */
911 if (route_is_immutable(route))
912 return EPERM;
914 return route_change(route, gw_addr, ifdev, flags, rtr);
916 case RTM_DELETE:
917 /* Routes for local addresses are immutable. */
918 if (route_is_immutable(route))
919 return EPERM;
921 route_delete(route, rtr);
923 return OK;
925 case RTM_LOCK:
927 * TODO: implement even the suggestion that we support this.
928 * For now, we do not keep per-route metrics, let alone change
929 * them dynamically ourselves, so "locking" metrics is really
930 * not a concept that applies to us. We may however have to
931 * save the lock mask and return it in queries..
933 /* FALLTHROUGH */
934 case RTM_GET:
935 /* Simply generate a message for the route we just found. */
936 rtsock_msg_route(route, type, rtr);
938 return OK;
940 default:
941 return EINVAL;
946 * Process a routing command from a routing socket. The RTM_ type of command
947 * is given as 'type', and is one of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_GET,
948 * RTM_LOCK. In addition, the function takes a set of sockaddr pointers as
949 * provided by the routing command. Each of these sockaddr pointers may be
950 * NULL; if not NULL, the structure is at least large enough to contain the
951 * address length (sa_len) and family (sa_family), and the length never exceeds
952 * the amount of memory used to store the sockaddr structure. However, the
953 * length itself has not yet been checked against the expected protocol
954 * structure and could even be zero. The command's RTF_ routing flags and
955 * metrics are provided as well. On success, return OK, in which case the
956 * caller assumes that a routing socket announcement for the processed command
957 * has been sent already (passing on 'rtr' to the announcement function as is).
958 * On failure, return a negative error code; in that case, the caller will send
959 * a failure response on the original routing socket itself.
962 route_process(unsigned int type, const struct sockaddr * dst,
963 const struct sockaddr * mask, const struct sockaddr * gateway,
964 const struct sockaddr * ifp, const struct sockaddr * ifa,
965 unsigned int flags, unsigned long inits,
966 const struct rt_metrics * rmx, const struct rtsock_request * rtr)
968 struct ifdev *ifdev, *ifdev2;
969 char name[IFNAMSIZ];
970 ip_addr_t dst_addr, if_addr;
971 uint32_t zone;
972 uint8_t addr_type;
973 int r;
976 * The identity of a route is determined by its destination address,
977 * destination zone, prefix length, and whether it is a host entry
978 * or not. If it is a host entry (RTF_HOST is set), the prefix length
979 * is implied by the protocol; otherwise it should be obtained from the
980 * given netmask if necessary. For link-local addresses, the zone ID
981 * must be embedded KAME-style in the destination address. A
982 * destination address must always be given. The destination address
983 * also determines the overall address family.
985 if (dst == NULL)
986 return EINVAL;
988 switch (dst->sa_family) {
989 case AF_INET:
990 addr_type = IPADDR_TYPE_V4;
991 break;
992 #ifdef INET6
993 case AF_INET6:
994 addr_type = IPADDR_TYPE_V6;
995 break;
996 #endif /* INET6 */
997 default:
998 return EAFNOSUPPORT;
1001 if ((r = addr_get_inet(dst, dst->sa_len, addr_type, &dst_addr,
1002 TRUE /*kame*/, NULL /*port*/)) != OK)
1003 return r;
1006 * Perform a generic test on the given flags. This covers everything
1007 * we support at all, plus a few flags we ignore. Specific route types
1008 * may have further restrictions; those tests are performed later.
1010 if ((flags & ~(RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_REJECT |
1011 RTF_CLONING | RTF_LLINFO | RTF_LLDATA | RTF_STATIC |
1012 RTF_BLACKHOLE | RTF_CLONED | RTF_PROTO2 | RTF_PROTO1)) != 0)
1013 return EINVAL;
1015 ifdev = NULL;
1017 if (type == RTM_ADD || type == RTM_CHANGE) {
1019 * If an interface address or name is given, use that to
1020 * identify the target interface. If both are given, make sure
1021 * that both identify the same interface--a hopefully helpful
1022 * feature to detect wrong route(8) usage (NetBSD simply takes
1023 * IFP over IFA). An empty interface name is ignored on the
1024 * basis that libc link_addr(3) is broken.
1026 if (ifp != NULL) {
1027 if ((r = addr_get_link(ifp, ifp->sa_len, name,
1028 sizeof(name), NULL /*hwaddr*/,
1029 0 /*hwaddr_len*/)) != OK)
1030 return r;
1032 if (name[0] != '\0' &&
1033 (ifdev = ifdev_find_by_name(name)) == NULL)
1034 return ENXIO;
1037 if (ifa != NULL) {
1039 * This is similar to retrieval of source addresses in
1040 * ipsock, with the difference that we do not impose
1041 * that a zone ID be given for link-local addresses.
1043 if ((r = addr_get_inet(ifa, ifa->sa_len, addr_type,
1044 &if_addr, TRUE /*kame*/, NULL /*port*/)) != OK)
1045 return r;
1047 if ((ifdev2 = ifaddr_map_by_addr(&if_addr)) == NULL)
1048 return EADDRNOTAVAIL;
1050 if (ifdev != NULL && ifdev != ifdev2)
1051 return EINVAL;
1052 else
1053 ifdev = ifdev2;
1057 * If the destination address has a zone, then it must not
1058 * conflict with the interface, if one was given. If not, we
1059 * may use it to decide the interface to use for the route.
1061 if (IP_IS_V6(&dst_addr) &&
1062 ip6_addr_has_zone(ip_2_ip6(&dst_addr))) {
1063 if (ifdev == NULL) {
1064 zone = ip6_addr_zone(ip_2_ip6(&dst_addr));
1066 ifdev = ifdev_get_by_index(zone);
1067 } else {
1068 if (!ip6_addr_test_zone(ip_2_ip6(&dst_addr),
1069 ifdev_get_netif(ifdev)))
1070 return EADDRNOTAVAIL;
1076 * For now, no initializers are supported by any of the sub-processing
1077 * routines, so outright reject requests that set any initializers.
1078 * Most importantly, we do not support per-route MTU settings (RTV_MTU)
1079 * because lwIP would not use them, and we do not support non-zero
1080 * expiry (RTV_EXPIRE) because for IPv4/IPv6 routes it is not a widely
1081 * used feature and for ARP/NDP we would have to change lwIP.
1082 * dhcpcd(8) does supply RTV_MTU, we have to ignore that option rather
1083 * than reject it, unfortunately. arp(8) always sets RTV_EXPIRE, so we
1084 * reject only non-zero expiry there.
1086 if ((inits & ~(RTV_EXPIRE | RTV_MTU)) != 0 ||
1087 ((inits & RTV_EXPIRE) != 0 && rmx->rmx_expire != 0))
1088 return ENOSYS;
1091 * From here on, the processing differs for ARP, NDP, and IP routes.
1092 * As of writing, our userland is from NetBSD 7, which puts link-local
1093 * route entries in its main route tables. This means we would have to
1094 * search for existing routes before we can determine whether, say, a
1095 * RTM_GET request is for an IP or an ARP route entry. As of NetBSD 8,
1096 * the link-local administration is separated, and all requests use the
1097 * RTF_LLDATA flag to indicate that they are for ARP/NDP routes rather
1098 * than IP routes. Since that change makes things much cleaner for us,
1099 * we borrow from the future, patching arp(8) and ndp(8) to add the
1100 * RTF_LLDATA flag now, so that we can implement a clean split here.
1102 if (!(flags & RTF_LLDATA))
1103 return route_process_inet(type, &dst_addr, mask, gateway,
1104 ifdev, flags, rtr);
1105 else
1106 return lldata_process(type, &dst_addr, gateway, ifdev, flags,
1107 rtr);
1111 * Return the routing flags (RTF_) for the given routing entry. Strip out any
1112 * internal flags.
1114 unsigned int
1115 route_get_flags(const struct route_entry * route)
1118 return route->re_flags & ~RTF_IPV6;
1122 * Return TRUE if the given routing entry is for the IPv6 address family, or
1123 * FALSE if it is for IPv4.
1126 route_is_ipv6(const struct route_entry * route)
1129 return !!(route->re_flags & RTF_IPV6);
1133 * Return the interface associated with the given routing entry. The resulting
1134 * interface is never NULL.
1136 struct ifdev *
1137 route_get_ifdev(const struct route_entry * route)
1140 return route->re_ifdev;
1144 * Convert the given raw routing address pointed to by 'rtaddr' into a
1145 * lwIP-style IP address 'ipaddr' of type 'type', which must by IPADDR_TYPE_V4
1146 * or IPADDR_TYPE_V6.
1148 static void
1149 route_get_addr(ip_addr_t * ipaddr, const uint8_t * rtaddr, uint8_t type)
1151 ip6_addr_t *ip6addr;
1152 uint32_t val, zone;
1155 * Convert the routing address to a lwIP-type IP address. Take out the
1156 * KAME-style embedded zone, if needed.
1158 memset(ipaddr, 0, sizeof(*ipaddr));
1159 IP_SET_TYPE(ipaddr, type);
1161 switch (type) {
1162 case IPADDR_TYPE_V4:
1163 memcpy(&val, rtaddr, sizeof(val));
1165 ip_addr_set_ip4_u32(ipaddr, val);
1167 break;
1169 case IPADDR_TYPE_V6:
1170 ip6addr = ip_2_ip6(ipaddr);
1172 memcpy(ip6addr->addr, rtaddr, sizeof(ip6addr->addr));
1174 if (ip6_addr_has_scope(ip6addr, IP6_UNKNOWN)) {
1175 zone = ntohl(ip6addr->addr[0]) & 0x0000ffffU;
1177 ip6addr->addr[0] &= PP_HTONL(0xffff0000U);
1179 ip6_addr_set_zone(ip6addr, zone);
1182 break;
1184 default:
1185 panic("unknown IP address type: %u", type);
1190 * Obtain information about an IPv4 or IPv6 routing entry, by filling 'addr',
1191 * 'mask', 'gateway', and optionally (if not NULL) 'ifp' and 'ifa' with
1192 * sockaddr-type data for each of those fields. Also store the associated
1193 * interface in 'ifdevp', the routing entry's flags in 'flags', and the route's
1194 * usage count in 'use'.
1196 void
1197 route_get(const struct route_entry * route, union sockaddr_any * addr,
1198 union sockaddr_any * mask, union sockaddr_any * gateway,
1199 union sockaddr_any * ifp, union sockaddr_any * ifa,
1200 struct ifdev ** ifdevp, unsigned int * flags, unsigned int * use)
1202 const ip_addr_t *src_addr;
1203 ip_addr_t dst_addr, gw_addr;
1204 struct ifdev *ifdev;
1205 socklen_t addr_len;
1206 uint8_t type;
1208 type = (route->re_flags & RTF_IPV6) ? IPADDR_TYPE_V6 : IPADDR_TYPE_V4;
1210 /* Get the destination address. */
1211 route_get_addr(&dst_addr, route->re_addr, type);
1213 addr_len = sizeof(*addr);
1215 addr_put_inet(&addr->sa, &addr_len, &dst_addr, TRUE /*kame*/,
1216 0 /*port*/);
1218 /* Get the network mask, if applicable. */
1219 if (!(route->re_flags & RTF_HOST)) {
1220 addr_len = sizeof(*mask);
1222 addr_put_netmask(&mask->sa, &addr_len, type,
1223 rttree_get_prefix(&route->re_entry));
1224 } else
1225 mask->sa.sa_len = 0;
1227 /* Get the gateway, which may be an IP address or a local link. */
1228 addr_len = sizeof(*gateway);
1230 ifdev = route->re_ifdev;
1232 if (route->re_flags & RTF_GATEWAY) {
1233 if (type == IPADDR_TYPE_V4)
1234 ip_addr_copy_from_ip4(gw_addr, route->re_gw4);
1235 else
1236 ip_addr_copy_from_ip6_packed(gw_addr, route->re_gw6);
1238 addr_put_inet(&gateway->sa, &addr_len, &gw_addr, TRUE /*kame*/,
1239 0 /*port*/);
1240 } else {
1241 addr_put_link(&gateway->sa, &addr_len, ifdev_get_index(ifdev),
1242 ifdev_get_iftype(ifdev), NULL /*name*/, NULL /*hwaddr*/,
1243 0 /*hwaddr_len*/);
1246 /* Get the associated interface name. */
1247 if (ifp != NULL) {
1248 addr_len = sizeof(*ifp);
1250 addr_put_link(&ifp->sa, &addr_len, ifdev_get_index(ifdev),
1251 ifdev_get_iftype(ifdev), ifdev_get_name(ifdev),
1252 NULL /*hwaddr*/, 0 /*hwaddr_len*/);
1255 /* Get the associated source address, if we can determine one. */
1256 if (ifa != NULL) {
1257 src_addr = ifaddr_select(&dst_addr, ifdev, NULL /*ifdevp*/);
1259 if (src_addr != NULL) {
1260 addr_len = sizeof(*ifa);
1262 addr_put_inet(&ifa->sa, &addr_len, src_addr,
1263 TRUE /*kame*/, 0 /*port*/);
1264 } else
1265 ifa->sa.sa_len = 0;
1268 /* Get other fields. */
1269 *flags = route_get_flags(route); /* strip any internal flags */
1270 *ifdevp = ifdev;
1271 *use = route->re_use;
1275 * Enumerate IPv4 routing entries. Return the first IPv4 routing entry if
1276 * 'last' is NULL, or the next routing entry after 'last' if it is not NULL.
1277 * In both cases, the return value may be NULL if there are no more routes.
1279 struct route_entry *
1280 route_enum_v4(struct route_entry * last)
1283 assert(last == NULL || !(last->re_flags & RTF_IPV6));
1285 return (struct route_entry *)rttree_enum(&route_tree[ROUTE_TREE_V4],
1286 (last != NULL) ? &last->re_entry : NULL);
1290 * Enumerate IPv6 routing entries. Return the first IPv6 routing entry if
1291 * 'last' is NULL, or the next routing entry after 'last' if it is not NULL.
1292 * In both cases, the return value may be NULL if there are no more routes.
1294 struct route_entry *
1295 route_enum_v6(struct route_entry * last)
1298 assert(last == NULL || (last->re_flags & RTF_IPV6));
1300 return (struct route_entry *)rttree_enum(&route_tree[ROUTE_TREE_V6],
1301 (last != NULL) ? &last->re_entry : NULL);
1305 * lwIP IPv4 routing function. Given an IPv4 destination address, look up and
1306 * return the target interface, or NULL if there is no route to the address.
1308 * This is a full replacement of the corresponding lwIP function, which should
1309 * be overridden with weak symbols, using patches against the lwIP source code.
1310 * As such, the lwIP headers should already provide the correct prototype for
1311 * this function. If not, something will have changed in the lwIP
1312 * implementation, and this code must be revised accordingly.
1314 struct netif *
1315 ip4_route(const ip4_addr_t * dst)
1317 struct route_entry *route;
1318 struct ifdev *ifdev;
1321 * Look up the route for the destination IPv4 address. If no route is
1322 * found at all, return NULL to the caller.
1324 if ((route = route_lookup_v4(dst)) == NULL) {
1325 route_miss_v4(dst);
1327 return NULL;
1331 * For now, we increase the use counter only for actual route lookups,
1332 * and not for gateway lookups or user queries. As of writing,
1333 * route(8) does not print this number anyway..
1335 route->re_use++;
1338 * For all packets that are supposed to be rejected or blackholed, use
1339 * a loopback interface, regardless of the interface to which the route
1340 * is associated (even though it will typically be lo0 anyway). The
1341 * reason for this is that on packet output, we perform another route
1342 * route lookup just to check for rejection/blackholing, but for
1343 * efficiency reasons, we limit such checks to loopback interfaces:
1344 * loopback traffic will typically use only one IP address anyway, thus
1345 * limiting route misses from such rejection/blackhole route lookups as
1346 * much as we can. The lookup is implemented in route_output_v4(). We
1347 * divert only if the target interface is not a loopback interface
1348 * already, mainly to allow userland tests to create blackhole routes
1349 * to a specific loopback interface for testing purposes.
1351 * It is not correct to return NULL for RTF_REJECT routes here, because
1352 * this could cause e.g. connect() calls to fail immediately, which is
1353 * not how rejection should work. Related: a previous incarnation of
1354 * support for these flags used a dedicated netif to eliminate the
1355 * extra route lookup on regular output altogether, but in the current
1356 * situation, that netif would have to be assigned (IPv4 and IPv6)
1357 * addresses in order not to break e.g. connect() in the same way.
1359 if ((route->re_flags & (RTF_REJECT | RTF_BLACKHOLE)) &&
1360 !ifdev_is_loopback(route->re_ifdev))
1361 ifdev = ifdev_get_loopback();
1362 else
1363 ifdev = route->re_ifdev;
1365 return ifdev_get_netif(ifdev);
1369 * lwIP IPv4 routing hook. Since this hook is called only from lwIP's own
1370 * ip4_route() implementation, this hook must never fire. If it does, either
1371 * something is wrong with overriding ip4_route(), or lwIP added other places
1372 * from which this hook is called. Both cases are highly problematic and must
1373 * be resolved somehow, which is why we simply call panic() here.
1375 struct netif *
1376 lwip_hook_ip4_route(const ip4_addr_t * dst)
1379 panic("IPv4 routing hook called - this should not happen!");
1383 * lwIP IPv4 ARP gateway hook.
1385 const ip4_addr_t *
1386 lwip_hook_etharp_get_gw(struct netif * netif, const ip4_addr_t * ip4addr)
1388 static ip4_addr_t gw_addr; /* may be returned to the caller */
1389 struct route_entry *route;
1391 /* Look up the route for the destination IP address. */
1392 if ((route = route_lookup_v4(ip4addr)) == NULL)
1393 return NULL;
1396 * This case could only ever trigger as a result of lwIP taking its own
1397 * routing decisions instead of calling the IPv4 routing hook. While
1398 * not impossible, such cases should be extremely rare. We cannot
1399 * provide a meaningful gateway address in this case either, though.
1401 if (route->re_ifdev != netif_get_ifdev(netif)) {
1402 printf("LWIP: unexpected interface for gateway lookup\n");
1404 return NULL;
1408 * If this route has a gateway, return the IP address of the gateway.
1409 * Otherwise, the route is for a local network, and we would typically
1410 * not get here because lwIP performs the local-network check itself.
1411 * It is possible that the local network consists of more than one IP
1412 * range, and the user has configured a route for the other range. In
1413 * that case, return the IP address of the actual destination.
1415 * We store a packed version of the IPv4 address, so reconstruct the
1416 * unpacked version to a static variable first - for consistency with
1417 * the IPv6 code.
1419 if (route->re_flags & RTF_GATEWAY) {
1420 ip4_addr_copy(gw_addr, route->re_gw4);
1422 return &gw_addr;
1423 } else
1424 return ip4addr;
1428 * lwIP IPv6 routing function. Given an IPv6 source and destination address,
1429 * look up and return the target interface, or NULL if there is no route to the
1430 * address. Our routing algorithm is destination-based, meaning that the
1431 * source address must be considered only to resolve zone ambiguity.
1433 * This is a full replacement of the corresponding lwIP function, which should
1434 * be overridden with weak symbols, using patches against the lwIP source code.
1435 * As such, the lwIP headers should already provide the correct prototype for
1436 * this function. If not, something will have changed in the lwIP
1437 * implementation, and this code must be revised accordingly.
1439 struct netif *
1440 ip6_route(const ip6_addr_t * src, const ip6_addr_t * dst)
1442 struct route_entry *route;
1443 struct ifdev *ifdev;
1444 ip6_addr_t dst_addr;
1445 uint32_t zone;
1447 assert(src != NULL);
1448 assert(dst != NULL);
1451 * If the destination address is scoped but has no zone, use the source
1452 * address to determine a zone, which we then set on the destination
1453 * address to find the route, if successful. Obviously, the interface
1454 * is not going to be different from the zone, but we do need to check
1455 * other aspects of the route (e.g., one might want to null-route all
1456 * multicast traffic). In the case that no source address is given at
1457 * all, first see if the destination address happens to be a locally
1458 * assigned address. In theory this could yield multiple matches, so
1459 * pick the first one. If not even that helps, we have absolutely
1460 * nothing we can use to refine route selection. We could pick an
1461 * arbitrary interface in that case, but we currently don't.
1463 zone = IP6_NO_ZONE;
1465 if (ip6_addr_lacks_zone(dst, IP6_UNKNOWN)) {
1466 if (ip6_addr_has_zone(src))
1467 zone = ip6_addr_zone(src);
1468 else if (!ip6_addr_isany(src)) {
1469 if ((ifdev = ifaddr_v6_map_by_addr(src)) == NULL)
1470 return NULL; /* should never happen */
1471 zone = ifdev_get_index(ifdev);
1472 } else {
1473 if ((ifdev = ifaddr_v6_map_by_addr(dst)) != NULL)
1474 zone = ifdev_get_index(ifdev);
1475 else
1476 return NULL; /* TODO: try harder */
1479 if (zone != IP6_NO_ZONE) {
1480 dst_addr = *dst;
1482 ip6_addr_set_zone(&dst_addr, zone);
1484 dst = &dst_addr;
1488 route = route_lookup_v6(dst);
1491 * Look up the route for the destination IPv6 address. If no route is
1492 * found at all, return NULL to the caller.
1494 if (route == NULL) {
1496 * Since we rely on userland to create routes for on-link
1497 * prefixes and default routers, we do not have to call lwIP's
1498 * nd6_find_route() here.
1501 /* Generate an RTM_MISS message. */
1502 route_miss_v6(dst);
1504 return NULL;
1508 * We have found a route based on the destination address. If we did
1509 * not pick the destination address zone based on the source address,
1510 * we should now check for source address zone violations. Note that
1511 * if even the destination address zone violates its target interface,
1512 * this case will be caught by route_lookup_v6().
1514 if (zone == IP6_NO_ZONE &&
1515 ifaddr_is_zone_mismatch(src, route->re_ifdev))
1516 return NULL;
1518 route->re_use++;
1521 * See ip4_route() for an explanation of the use of loopback here. For
1522 * the IPv6 case, the matching logic is in route_output_v6().
1524 if ((route->re_flags & (RTF_REJECT | RTF_BLACKHOLE)) &&
1525 !ifdev_is_loopback(route->re_ifdev))
1526 ifdev = ifdev_get_loopback();
1527 else
1528 ifdev = route->re_ifdev;
1531 * If the selected interface would cause the destination address to
1532 * leave its zone, fail route selection altogether. This case may
1533 * trigger especially for reject routes, for which the interface change
1534 * to loopback may introduce a zone violation.
1536 if (ip6_addr_has_zone(dst) &&
1537 !ip6_addr_test_zone(dst, ifdev_get_netif(ifdev)))
1538 return NULL;
1540 return ifdev_get_netif(ifdev);
1544 * lwIP IPv6 (source) routing hook. Since this hook is called only from lwIP's
1545 * own ip6_route() implementation, this hook must never fire. If it does,
1546 * either something is wrong with overriding ip6_route(), or lwIP added other
1547 * places from which this hook is called. Both cases are highly problematic
1548 * and must be resolved somehow, which is why we simply call panic() here.
1550 struct netif *
1551 lwip_hook_ip6_route(const ip6_addr_t * src, const ip6_addr_t * dst)
1554 panic("IPv6 routing hook called - this should not happen!");
1558 * lwIP IPv6 ND6 gateway hook.
1560 const ip6_addr_t *
1561 lwip_hook_nd6_get_gw(struct netif * netif, const ip6_addr_t * ip6addr)
1563 static ip6_addr_t gw_addr; /* may be returned to the caller */
1564 struct route_entry *route;
1565 struct ifdev *ifdev;
1567 ifdev = netif_get_ifdev(netif);
1568 assert(ifdev != NULL);
1570 /* Look up the route for the destination IP address. */
1571 if ((route = route_lookup_v6(ip6addr)) == NULL)
1572 return NULL;
1574 /* As for IPv4. */
1575 if (route->re_ifdev != ifdev) {
1576 printf("LWIP: unexpected interface for gateway lookup\n");
1578 return NULL;
1582 * We save memory by storing a packed (zoneless) version of the IPv6
1583 * gateway address. That means we cannot return a pointer to it here.
1584 * Instead, we have to resort to expanding the address into a static
1585 * variable. The caller will immediately make a copy anyway, though.
1587 if (route->re_flags & RTF_GATEWAY) {
1588 ip6_addr_copy_from_packed(gw_addr, route->re_gw6);
1589 ip6_addr_assign_zone(&gw_addr, IP6_UNKNOWN, netif);
1591 return &gw_addr;
1592 } else
1593 return ip6addr;
1597 * Check whether a packet is allowed to be sent to the given destination IPv4
1598 * address 'ipaddr' on the interface 'ifdev', according to route information.
1599 * Return TRUE if the packet should be sent. Return FALSE if the packet should
1600 * be rejected or discarded, with 'err' set to the error to return to lwIP.
1603 route_output_v4(struct ifdev * ifdev, const ip4_addr_t * ipaddr, err_t * err)
1605 const struct route_entry *route;
1607 /* See if we should reject/blackhole packets to this destination. */
1608 if (ifdev_is_loopback(ifdev) &&
1609 (route = route_lookup_v4(ipaddr)) != NULL &&
1610 (route->re_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
1611 if (route->re_flags & RTF_REJECT)
1612 *err = ERR_RTE;
1613 else
1614 *err = ERR_OK;
1616 return FALSE;
1619 return TRUE;
1623 * Check whether a packet is allowed to be sent to the given destination IPv6
1624 * address 'ipaddr' on the interface 'ifdev', according to route information.
1625 * Return TRUE if the packet should be sent. Return FALSE if the packet should
1626 * be rejected or discarded, with 'err' set to the error to return to lwIP.
1629 route_output_v6(struct ifdev * ifdev, const ip6_addr_t * ipaddr, err_t * err)
1631 const struct route_entry *route;
1633 /* Do one more zone violation test, just in case. It's cheap. */
1634 if (ip6_addr_has_zone(ipaddr) &&
1635 !ip6_addr_test_zone(ipaddr, ifdev_get_netif(ifdev))) {
1636 *err = ERR_RTE;
1638 return FALSE;
1641 /* See if we should reject/blackhole packets to this destination. */
1642 if (ifdev_is_loopback(ifdev) &&
1643 (route = route_lookup_v6(ipaddr)) != NULL &&
1644 (route->re_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
1645 if (route->re_flags & RTF_REJECT)
1646 *err = ERR_RTE;
1647 else
1648 *err = ERR_OK;
1650 return FALSE;
1653 return TRUE;